diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index bbde3c49f64c6..0c5d1445d36e8 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -671,12 +671,21 @@ void SIFrameLowering::emitEntryFunctionFlatScratchInit( } // Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not -// memory. They should have been removed by now. -static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) { +// memory. They should have been removed by now, except CFI Saved Reg spills. +static bool allStackObjectsAreDead(const MachineFunction &MF) { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + const SIMachineFunctionInfo *FuncInfo = MF.getInfo(); for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd(); I != E; ++I) { - if (!MFI.isDeadObjectIndex(I)) + if (!MFI.isDeadObjectIndex(I)) { + // determineCalleeSaves() might have added the SGPRSpill stack IDs for + // CFI saves into scratch VGPR, ignore them + if (MFI.getStackID(I) == TargetStackID::SGPRSpill && + FuncInfo->checkIndexInPrologEpilogSGPRSpills(I)) { + continue; + } return false; + } } return true; @@ -696,8 +705,8 @@ Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg( Register ScratchRsrcReg = MFI->getScratchRSrcReg(); - if (!ScratchRsrcReg || (!MRI.isPhysRegUsed(ScratchRsrcReg) && - allStackObjectsAreDead(MF.getFrameInfo()))) + if (!ScratchRsrcReg || + (!MRI.isPhysRegUsed(ScratchRsrcReg) && allStackObjectsAreDead(MF))) return Register(); if (ST.hasSGPRInitBug() || @@ -925,7 +934,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, bool NeedsFlatScratchInit = MFI->getUserSGPRInfo().hasFlatScratchInit() && (MRI.isPhysRegUsed(AMDGPU::FLAT_SCR) || FrameInfo.hasCalls() || - (!allStackObjectsAreDead(FrameInfo) && ST.enableFlatScratch())); + (!allStackObjectsAreDead(MF) && ST.enableFlatScratch())); if ((NeedsFlatScratchInit || ScratchRsrcReg) && PreloadedScratchWaveOffsetReg && !ST.flatScratchIsArchitected()) { @@ -1306,6 +1315,11 @@ void SIFrameLowering::emitCSRSpillStores(MachineFunction &MF, LiveUnits.addReg(Reg); } } + + // Remove the spill entry created for EXEC. It is needed only for CFISaves in + // the prologue. + if (TRI.isCFISavedRegsSpillEnabled()) + FuncInfo->removePrologEpilogSGPRSpillEntry(TRI.getExec()); } void SIFrameLowering::emitCSRSpillRestores( @@ -1789,14 +1803,14 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized( // can. Any remaining SGPR spills will go to memory, so move them back to the // default stack. bool HaveSGPRToVMemSpill = - FuncInfo->removeDeadFrameIndices(MFI, /*ResetSGPRSpillStackIDs*/ true); + FuncInfo->removeDeadFrameIndices(MF, /*ResetSGPRSpillStackIDs*/ true); assert(allSGPRSpillsAreDead(MF) && "SGPR spill should have been removed in SILowerSGPRSpills"); // FIXME: The other checks should be redundant with allStackObjectsAreDead, // but currently hasNonSpillStackObjects is set only from source // allocas. Stack temps produced from legalization are not counted currently. - if (!allStackObjectsAreDead(MFI)) { + if (!allStackObjectsAreDead(MF)) { assert(RS && "RegScavenger required if spilling"); // Add an emergency spill slot @@ -1896,6 +1910,18 @@ void SIFrameLowering::determinePrologEpilogSGPRSaves( MFI->setSGPRForEXECCopy(AMDGPU::NoRegister); } + if (TRI->isCFISavedRegsSpillEnabled()) { + Register Exec = TRI->getExec(); + assert(!MFI->hasPrologEpilogSGPRSpillEntry(Exec) && + "Re-reserving spill slot for EXEC"); + // FIXME: Machine Copy Propagation currently optimizes away the EXEC copy to + // the scratch as we emit it only in the prolog. This optimization should + // not happen for frame related instructions. Until this is fixed ignore + // copy to scratch SGPR. + getVGPRSpillLaneOrTempRegister(MF, LiveUnits, Exec, RC, + /*IncludeScratchCopy=*/false); + } + // hasFP only knows about stack objects that already exist. We're now // determining the stack slots that will be created, so we have to predict // them. Stack objects force FP usage with calls. @@ -1905,8 +1931,7 @@ void SIFrameLowering::determinePrologEpilogSGPRSaves( // // FIXME: Is this really hasReservedCallFrame? const bool WillHaveFP = - FrameInfo.hasCalls() && - (SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo)); + FrameInfo.hasCalls() && (SavedVGPRs.any() || !allStackObjectsAreDead(MF)); if (WillHaveFP || hasFP(MF)) { Register FramePtrReg = MFI->getFrameOffsetReg(); diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.h b/llvm/lib/Target/AMDGPU/SIFrameLowering.h index 2b716db0b7a22..526404eb83b4f 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.h +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.h @@ -114,6 +114,13 @@ class SIFrameLowering final : public AMDGPUFrameLowering { public: bool requiresStackPointerReference(const MachineFunction &MF) const; + /// If '-amdgpu-spill-cfi-saved-regs' is enabled, emit RA/EXEC spills to + /// a free VGPR (lanes) or memory and corresponding CFI rules. + void emitCFISavedRegSpills(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + LiveRegUnits &LiveRegs, + bool emitSpillsToMem) const; + /// Create a CFI index for CFIInst and build a MachineInstr around it. MachineInstr * buildCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, diff --git a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp index 62386da94d854..57ff52334a470 100644 --- a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp @@ -531,7 +531,7 @@ bool SILowerSGPRSpills::run(MachineFunction &MF) { // free frame index ids by the later pass(es) like "stack slot coloring" // which in turn could mess-up with the book keeping of "frame index to VGPR // lane". - FuncInfo->removeDeadFrameIndices(MFI, /*ResetSGPRSpillStackIDs*/ false); + FuncInfo->removeDeadFrameIndices(MF, /*ResetSGPRSpillStackIDs*/ false); MadeChange = true; } diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index b398db4f7caff..2c275a85440d9 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -566,7 +566,8 @@ bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF, } bool SIMachineFunctionInfo::removeDeadFrameIndices( - MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs) { + MachineFunction &MF, bool ResetSGPRSpillStackIDs) { + MachineFrameInfo &MFI = MF.getFrameInfo(); // Remove dead frame indices from function frame, however keep FP & BP since // spills for them haven't been inserted yet. And also make sure to remove the // frame indices from `SGPRSpillsToVirtualVGPRLanes` data structure, diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index 2c1a13c345aac..8ca34d10de0ef 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -752,6 +752,16 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction, }) != PrologEpilogSGPRSpills.end(); } + // Remove if an entry created for \p Reg. + void removePrologEpilogSGPRSpillEntry(Register Reg) { + auto I = find_if(PrologEpilogSGPRSpills, + [&Reg](const auto &Spill) { return Spill.first == Reg; }); + if (I == PrologEpilogSGPRSpills.end()) + return; + + PrologEpilogSGPRSpills.erase(I); + } + const PrologEpilogSGPRSaveRestoreInfo & getPrologEpilogSGPRSaveRestoreInfo(Register Reg) const { const auto *I = find_if(PrologEpilogSGPRSpills, [&Reg](const auto &Spill) { @@ -830,8 +840,7 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction, /// If \p ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill /// to the default stack. - bool removeDeadFrameIndices(MachineFrameInfo &MFI, - bool ResetSGPRSpillStackIDs); + bool removeDeadFrameIndices(MachineFunction &MF, bool ResetSGPRSpillStackIDs); int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI); std::optional getOptionalScavengeFI() const { return ScavengeFI; } diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 77608a4cfc751..9677c6cd7806c 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -35,6 +35,11 @@ static cl::opt EnableSpillSGPRToVGPR( cl::ReallyHidden, cl::init(true)); +static cl::opt EnableSpillCFISavedRegs( + "amdgpu-spill-cfi-saved-regs", + cl::desc("Enable spilling the registers required for CFI emission"), + cl::ReallyHidden, cl::init(false), cl::ZeroOrMore); + std::array, 32> SIRegisterInfo::RegSplitParts; std::array, 9> SIRegisterInfo::SubRegFromChannelTable; @@ -559,6 +564,10 @@ unsigned SIRegisterInfo::getSubRegFromChannel(unsigned Channel, return SubRegFromChannelTable[NumRegIndex - 1][Channel]; } +bool SIRegisterInfo::isCFISavedRegsSpillEnabled() const { + return EnableSpillCFISavedRegs; +} + MCRegister SIRegisterInfo::getAlignedHighSGPRForRC(const MachineFunction &MF, const unsigned Align, diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h index 2dae5f0eb1c69..749583722e3b0 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -80,6 +80,8 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo { return SpillSGPRToVGPR; } + bool isCFISavedRegsSpillEnabled() const; + /// Return the largest available SGPR aligned to \p Align for the register /// class \p RC. MCRegister getAlignedHighSGPRForRC(const MachineFunction &MF, diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-spill-cfi-saved-regs.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-spill-cfi-saved-regs.ll new file mode 100644 index 0000000000000..c804c75ae7d2c --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-spill-cfi-saved-regs.ll @@ -0,0 +1,2556 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-spill-cfi-saved-regs -verify-machineinstrs -o - %s | FileCheck --check-prefixes=CHECK,WAVE64 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -amdgpu-spill-cfi-saved-regs -verify-machineinstrs -o - %s | FileCheck --check-prefixes=CHECK,WAVE32 %s + +define protected amdgpu_kernel void @kern() #0 { +; CHECK-LABEL: kern: +; CHECK: .Lfunc_begin0: +; CHECK-NEXT: .cfi_sections .debug_frame +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: ; %bb.0: ; %entry +; CHECK-NEXT: .cfi_escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 ; +; CHECK-NEXT: .cfi_undefined 16 +; CHECK-NEXT: s_endpgm +entry: + ret void +} + +define hidden void @func_saved_in_clobbered_vgpr() #0 { +; WAVE64-LABEL: func_saved_in_clobbered_vgpr: +; WAVE64: .Lfunc_begin1: +; WAVE64-NEXT: .cfi_startproc +; WAVE64-NEXT: ; %bb.0: ; %entry +; WAVE64-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; WAVE64-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; WAVE64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; WAVE64-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; WAVE64-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_offset 2560, 0 +; WAVE64-NEXT: s_mov_b64 exec, s[4:5] +; WAVE64-NEXT: v_writelane_b32 v0, exec_lo, 0 +; WAVE64-NEXT: v_writelane_b32 v0, exec_hi, 1 +; WAVE64-NEXT: .cfi_llvm_vector_registers 17, 2560, 0, 32, 2560, 1, 32 +; WAVE64-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; WAVE64-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; WAVE64-NEXT: s_mov_b64 exec, s[4:5] +; WAVE64-NEXT: s_waitcnt vmcnt(0) +; WAVE64-NEXT: s_setpc_b64 s[30:31] +; +; WAVE32-LABEL: func_saved_in_clobbered_vgpr: +; WAVE32: .Lfunc_begin1: +; WAVE32-NEXT: .cfi_startproc +; WAVE32-NEXT: ; %bb.0: ; %entry +; WAVE32-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; WAVE32-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; WAVE32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; WAVE32-NEXT: s_xor_saveexec_b32 s4, -1 +; WAVE32-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_offset 1536, 0 +; WAVE32-NEXT: s_waitcnt_depctr 0xffe3 +; WAVE32-NEXT: s_mov_b32 exec_lo, s4 +; WAVE32-NEXT: v_writelane_b32 v0, exec_lo, 0 +; WAVE32-NEXT: .cfi_llvm_vector_registers 1, 1536, 0, 32 +; WAVE32-NEXT: s_xor_saveexec_b32 s4, -1 +; WAVE32-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; WAVE32-NEXT: s_waitcnt_depctr 0xffe3 +; WAVE32-NEXT: s_mov_b32 exec_lo, s4 +; WAVE32-NEXT: s_waitcnt vmcnt(0) +; WAVE32-NEXT: s_setpc_b64 s[30:31] +entry: + ret void +} + +; Check that the option causes a CSR VGPR to spill when needed. +define hidden void @func_saved_in_preserved_vgpr() #0 { +; WAVE64-LABEL: func_saved_in_preserved_vgpr: +; WAVE64: .Lfunc_begin2: +; WAVE64-NEXT: .cfi_startproc +; WAVE64-NEXT: ; %bb.0: ; %entry +; WAVE64-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; WAVE64-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; WAVE64-NEXT: .cfi_undefined 2560 +; WAVE64-NEXT: .cfi_undefined 2561 +; WAVE64-NEXT: .cfi_undefined 2562 +; WAVE64-NEXT: .cfi_undefined 2563 +; WAVE64-NEXT: .cfi_undefined 2564 +; WAVE64-NEXT: .cfi_undefined 2565 +; WAVE64-NEXT: .cfi_undefined 2566 +; WAVE64-NEXT: .cfi_undefined 2567 +; WAVE64-NEXT: .cfi_undefined 2568 +; WAVE64-NEXT: .cfi_undefined 2569 +; WAVE64-NEXT: .cfi_undefined 2570 +; WAVE64-NEXT: .cfi_undefined 2571 +; WAVE64-NEXT: .cfi_undefined 2572 +; WAVE64-NEXT: .cfi_undefined 2573 +; WAVE64-NEXT: .cfi_undefined 2574 +; WAVE64-NEXT: .cfi_undefined 2575 +; WAVE64-NEXT: .cfi_undefined 2576 +; WAVE64-NEXT: .cfi_undefined 2577 +; WAVE64-NEXT: .cfi_undefined 2578 +; WAVE64-NEXT: .cfi_undefined 2579 +; WAVE64-NEXT: .cfi_undefined 2580 +; WAVE64-NEXT: .cfi_undefined 2581 +; WAVE64-NEXT: .cfi_undefined 2582 +; WAVE64-NEXT: .cfi_undefined 2583 +; WAVE64-NEXT: .cfi_undefined 2584 +; WAVE64-NEXT: .cfi_undefined 2585 +; WAVE64-NEXT: .cfi_undefined 2586 +; WAVE64-NEXT: .cfi_undefined 2587 +; WAVE64-NEXT: .cfi_undefined 2588 +; WAVE64-NEXT: .cfi_undefined 2589 +; WAVE64-NEXT: .cfi_undefined 2590 +; WAVE64-NEXT: .cfi_undefined 2591 +; WAVE64-NEXT: .cfi_undefined 2592 +; WAVE64-NEXT: .cfi_undefined 2593 +; WAVE64-NEXT: .cfi_undefined 2594 +; WAVE64-NEXT: .cfi_undefined 2595 +; WAVE64-NEXT: .cfi_undefined 2596 +; WAVE64-NEXT: .cfi_undefined 2597 +; WAVE64-NEXT: .cfi_undefined 2598 +; WAVE64-NEXT: .cfi_undefined 2599 +; WAVE64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; WAVE64-NEXT: s_or_saveexec_b64 s[4:5], -1 +; WAVE64-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_offset 2600, 0 +; WAVE64-NEXT: s_mov_b64 exec, s[4:5] +; WAVE64-NEXT: v_writelane_b32 v40, exec_lo, 0 +; WAVE64-NEXT: v_writelane_b32 v40, exec_hi, 1 +; WAVE64-NEXT: .cfi_llvm_vector_registers 17, 2600, 0, 32, 2600, 1, 32 +; WAVE64-NEXT: ;;#ASMSTART +; WAVE64-NEXT: ; clobber nonpreserved VGPRs +; WAVE64-NEXT: ;;#ASMEND +; WAVE64-NEXT: s_or_saveexec_b64 s[4:5], -1 +; WAVE64-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; WAVE64-NEXT: s_mov_b64 exec, s[4:5] +; WAVE64-NEXT: s_waitcnt vmcnt(0) +; WAVE64-NEXT: s_setpc_b64 s[30:31] +; +; WAVE32-LABEL: func_saved_in_preserved_vgpr: +; WAVE32: .Lfunc_begin2: +; WAVE32-NEXT: .cfi_startproc +; WAVE32-NEXT: ; %bb.0: ; %entry +; WAVE32-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; WAVE32-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; WAVE32-NEXT: .cfi_undefined 1536 +; WAVE32-NEXT: .cfi_undefined 1537 +; WAVE32-NEXT: .cfi_undefined 1538 +; WAVE32-NEXT: .cfi_undefined 1539 +; WAVE32-NEXT: .cfi_undefined 1540 +; WAVE32-NEXT: .cfi_undefined 1541 +; WAVE32-NEXT: .cfi_undefined 1542 +; WAVE32-NEXT: .cfi_undefined 1543 +; WAVE32-NEXT: .cfi_undefined 1544 +; WAVE32-NEXT: .cfi_undefined 1545 +; WAVE32-NEXT: .cfi_undefined 1546 +; WAVE32-NEXT: .cfi_undefined 1547 +; WAVE32-NEXT: .cfi_undefined 1548 +; WAVE32-NEXT: .cfi_undefined 1549 +; WAVE32-NEXT: .cfi_undefined 1550 +; WAVE32-NEXT: .cfi_undefined 1551 +; WAVE32-NEXT: .cfi_undefined 1552 +; WAVE32-NEXT: .cfi_undefined 1553 +; WAVE32-NEXT: .cfi_undefined 1554 +; WAVE32-NEXT: .cfi_undefined 1555 +; WAVE32-NEXT: .cfi_undefined 1556 +; WAVE32-NEXT: .cfi_undefined 1557 +; WAVE32-NEXT: .cfi_undefined 1558 +; WAVE32-NEXT: .cfi_undefined 1559 +; WAVE32-NEXT: .cfi_undefined 1560 +; WAVE32-NEXT: .cfi_undefined 1561 +; WAVE32-NEXT: .cfi_undefined 1562 +; WAVE32-NEXT: .cfi_undefined 1563 +; WAVE32-NEXT: .cfi_undefined 1564 +; WAVE32-NEXT: .cfi_undefined 1565 +; WAVE32-NEXT: .cfi_undefined 1566 +; WAVE32-NEXT: .cfi_undefined 1567 +; WAVE32-NEXT: .cfi_undefined 1568 +; WAVE32-NEXT: .cfi_undefined 1569 +; WAVE32-NEXT: .cfi_undefined 1570 +; WAVE32-NEXT: .cfi_undefined 1571 +; WAVE32-NEXT: .cfi_undefined 1572 +; WAVE32-NEXT: .cfi_undefined 1573 +; WAVE32-NEXT: .cfi_undefined 1574 +; WAVE32-NEXT: .cfi_undefined 1575 +; WAVE32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; WAVE32-NEXT: s_or_saveexec_b32 s4, -1 +; WAVE32-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_offset 1576, 0 +; WAVE32-NEXT: s_waitcnt_depctr 0xffe3 +; WAVE32-NEXT: s_mov_b32 exec_lo, s4 +; WAVE32-NEXT: v_writelane_b32 v40, exec_lo, 0 +; WAVE32-NEXT: .cfi_llvm_vector_registers 1, 1576, 0, 32 +; WAVE32-NEXT: ;;#ASMSTART +; WAVE32-NEXT: ; clobber nonpreserved VGPRs +; WAVE32-NEXT: ;;#ASMEND +; WAVE32-NEXT: s_or_saveexec_b32 s4, -1 +; WAVE32-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; WAVE32-NEXT: s_waitcnt_depctr 0xffe3 +; WAVE32-NEXT: s_mov_b32 exec_lo, s4 +; WAVE32-NEXT: s_waitcnt vmcnt(0) +; WAVE32-NEXT: s_setpc_b64 s[30:31] +entry: + call void asm sideeffect "; clobber nonpreserved VGPRs", + "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9} + ,~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19} + ,~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29} + ,~{v30},~{v31},~{v32},~{v33},~{v34},~{v35},~{v36},~{v37},~{v38},~{v39}"() + ret void +} + +; There's no return here, so the return address live in was deleted. +define void @empty_func() { +; WAVE64-LABEL: empty_func: +; WAVE64: .Lfunc_begin3: +; WAVE64-NEXT: .cfi_startproc +; WAVE64-NEXT: ; %bb.0: +; WAVE64-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; WAVE64-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; WAVE64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; WAVE64-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; WAVE64-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_offset 2560, 0 +; WAVE64-NEXT: s_mov_b64 exec, s[4:5] +; WAVE64-NEXT: v_writelane_b32 v0, exec_lo, 0 +; WAVE64-NEXT: v_writelane_b32 v0, exec_hi, 1 +; +; WAVE32-LABEL: empty_func: +; WAVE32: .Lfunc_begin3: +; WAVE32-NEXT: .cfi_startproc +; WAVE32-NEXT: ; %bb.0: +; WAVE32-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; WAVE32-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; WAVE32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; WAVE32-NEXT: s_xor_saveexec_b32 s4, -1 +; WAVE32-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_offset 1536, 0 +; WAVE32-NEXT: s_waitcnt_depctr 0xffe3 +; WAVE32-NEXT: s_mov_b32 exec_lo, s4 +; WAVE32-NEXT: v_writelane_b32 v0, exec_lo, 0 + unreachable +} + +; Check that the option causes RA and EXEC to be spilled to memory. +define void @no_vgprs_to_spill_into() #1 { +; WAVE64-LABEL: no_vgprs_to_spill_into: +; WAVE64: .Lfunc_begin4: +; WAVE64-NEXT: .cfi_startproc +; WAVE64-NEXT: ; %bb.0: +; WAVE64-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; WAVE64-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; WAVE64-NEXT: .cfi_undefined 2560 +; WAVE64-NEXT: .cfi_undefined 2561 +; WAVE64-NEXT: .cfi_undefined 2562 +; WAVE64-NEXT: .cfi_undefined 2563 +; WAVE64-NEXT: .cfi_undefined 2564 +; WAVE64-NEXT: .cfi_undefined 2565 +; WAVE64-NEXT: .cfi_undefined 2566 +; WAVE64-NEXT: .cfi_undefined 2567 +; WAVE64-NEXT: .cfi_undefined 2568 +; WAVE64-NEXT: .cfi_undefined 2569 +; WAVE64-NEXT: .cfi_undefined 2570 +; WAVE64-NEXT: .cfi_undefined 2571 +; WAVE64-NEXT: .cfi_undefined 2572 +; WAVE64-NEXT: .cfi_undefined 2573 +; WAVE64-NEXT: .cfi_undefined 2574 +; WAVE64-NEXT: .cfi_undefined 2575 +; WAVE64-NEXT: .cfi_undefined 2576 +; WAVE64-NEXT: .cfi_undefined 2577 +; WAVE64-NEXT: .cfi_undefined 2578 +; WAVE64-NEXT: .cfi_undefined 2579 +; WAVE64-NEXT: .cfi_undefined 2580 +; WAVE64-NEXT: .cfi_undefined 2581 +; WAVE64-NEXT: .cfi_undefined 2582 +; WAVE64-NEXT: .cfi_undefined 2583 +; WAVE64-NEXT: .cfi_undefined 2584 +; WAVE64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; WAVE64-NEXT: v_mov_b32_e32 v0, exec_lo +; WAVE64-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; WAVE64-NEXT: v_mov_b32_e32 v0, exec_hi +; WAVE64-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_offset 17, 0 +; WAVE64-NEXT: ;;#ASMSTART +; WAVE64-NEXT: ;;#ASMEND +; WAVE64-NEXT: s_waitcnt vmcnt(0) +; WAVE64-NEXT: s_setpc_b64 s[30:31] +; +; WAVE32-LABEL: no_vgprs_to_spill_into: +; WAVE32: .Lfunc_begin4: +; WAVE32-NEXT: .cfi_startproc +; WAVE32-NEXT: ; %bb.0: +; WAVE32-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; WAVE32-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; WAVE32-NEXT: .cfi_undefined 1536 +; WAVE32-NEXT: .cfi_undefined 1537 +; WAVE32-NEXT: .cfi_undefined 1538 +; WAVE32-NEXT: .cfi_undefined 1539 +; WAVE32-NEXT: .cfi_undefined 1540 +; WAVE32-NEXT: .cfi_undefined 1541 +; WAVE32-NEXT: .cfi_undefined 1542 +; WAVE32-NEXT: .cfi_undefined 1543 +; WAVE32-NEXT: .cfi_undefined 1544 +; WAVE32-NEXT: .cfi_undefined 1545 +; WAVE32-NEXT: .cfi_undefined 1546 +; WAVE32-NEXT: .cfi_undefined 1547 +; WAVE32-NEXT: .cfi_undefined 1548 +; WAVE32-NEXT: .cfi_undefined 1549 +; WAVE32-NEXT: .cfi_undefined 1550 +; WAVE32-NEXT: .cfi_undefined 1551 +; WAVE32-NEXT: .cfi_undefined 1552 +; WAVE32-NEXT: .cfi_undefined 1553 +; WAVE32-NEXT: .cfi_undefined 1554 +; WAVE32-NEXT: .cfi_undefined 1555 +; WAVE32-NEXT: .cfi_undefined 1556 +; WAVE32-NEXT: .cfi_undefined 1557 +; WAVE32-NEXT: .cfi_undefined 1558 +; WAVE32-NEXT: .cfi_undefined 1559 +; WAVE32-NEXT: .cfi_undefined 1560 +; WAVE32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; WAVE32-NEXT: s_xor_saveexec_b32 s4, -1 +; WAVE32-NEXT: buffer_store_dword v25, off, s[0:3], s32 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_offset 1561, 0 +; WAVE32-NEXT: s_waitcnt_depctr 0xffe3 +; WAVE32-NEXT: s_mov_b32 exec_lo, s4 +; WAVE32-NEXT: v_writelane_b32 v25, exec_lo, 0 +; WAVE32-NEXT: .cfi_llvm_vector_registers 1, 1561, 0, 32 +; WAVE32-NEXT: ;;#ASMSTART +; WAVE32-NEXT: ;;#ASMEND +; WAVE32-NEXT: s_xor_saveexec_b32 s4, -1 +; WAVE32-NEXT: buffer_load_dword v25, off, s[0:3], s32 ; 4-byte Folded Reload +; WAVE32-NEXT: s_waitcnt_depctr 0xffe3 +; WAVE32-NEXT: s_mov_b32 exec_lo, s4 +; WAVE32-NEXT: s_waitcnt vmcnt(0) +; WAVE32-NEXT: s_setpc_b64 s[30:31] + call void asm sideeffect "", + "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9} + ,~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19} + ,~{v20},~{v21},~{v22},~{v23},~{v24}"() + + ret void +} + +; Check that the FP and EXEC needs to be spilled to memory, even though +; we have reserved VGPR but there are no available free lanes. +define void @callee_need_to_spill_fp_exec_to_memory() #2 { +; WAVE64-LABEL: callee_need_to_spill_fp_exec_to_memory: +; WAVE64: .Lfunc_begin5: +; WAVE64-NEXT: .cfi_startproc +; WAVE64-NEXT: ; %bb.0: +; WAVE64-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; WAVE64-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; WAVE64-NEXT: .cfi_undefined 2560 +; WAVE64-NEXT: .cfi_undefined 2561 +; WAVE64-NEXT: .cfi_undefined 2562 +; WAVE64-NEXT: .cfi_undefined 2563 +; WAVE64-NEXT: .cfi_undefined 2564 +; WAVE64-NEXT: .cfi_undefined 2565 +; WAVE64-NEXT: .cfi_undefined 2566 +; WAVE64-NEXT: .cfi_undefined 2567 +; WAVE64-NEXT: .cfi_undefined 2568 +; WAVE64-NEXT: .cfi_undefined 2569 +; WAVE64-NEXT: .cfi_undefined 2570 +; WAVE64-NEXT: .cfi_undefined 2571 +; WAVE64-NEXT: .cfi_undefined 2572 +; WAVE64-NEXT: .cfi_undefined 2573 +; WAVE64-NEXT: .cfi_undefined 2574 +; WAVE64-NEXT: .cfi_undefined 2575 +; WAVE64-NEXT: .cfi_undefined 2576 +; WAVE64-NEXT: .cfi_undefined 2577 +; WAVE64-NEXT: .cfi_undefined 2578 +; WAVE64-NEXT: .cfi_undefined 2579 +; WAVE64-NEXT: .cfi_undefined 2580 +; WAVE64-NEXT: .cfi_undefined 2581 +; WAVE64-NEXT: .cfi_undefined 2582 +; WAVE64-NEXT: .cfi_undefined 2583 +; WAVE64-NEXT: .cfi_undefined 2584 +; WAVE64-NEXT: .cfi_undefined 2585 +; WAVE64-NEXT: .cfi_undefined 2586 +; WAVE64-NEXT: .cfi_undefined 2587 +; WAVE64-NEXT: .cfi_undefined 2588 +; WAVE64-NEXT: .cfi_undefined 2589 +; WAVE64-NEXT: .cfi_undefined 2590 +; WAVE64-NEXT: .cfi_undefined 2591 +; WAVE64-NEXT: .cfi_undefined 2592 +; WAVE64-NEXT: .cfi_undefined 2593 +; WAVE64-NEXT: .cfi_undefined 2594 +; WAVE64-NEXT: .cfi_undefined 2595 +; WAVE64-NEXT: .cfi_undefined 2596 +; WAVE64-NEXT: .cfi_undefined 2597 +; WAVE64-NEXT: .cfi_undefined 2598 +; WAVE64-NEXT: .cfi_undefined 2599 +; WAVE64-NEXT: .cfi_undefined 2608 +; WAVE64-NEXT: .cfi_undefined 2609 +; WAVE64-NEXT: .cfi_undefined 2610 +; WAVE64-NEXT: .cfi_undefined 2611 +; WAVE64-NEXT: .cfi_undefined 2612 +; WAVE64-NEXT: .cfi_undefined 2613 +; WAVE64-NEXT: .cfi_undefined 2614 +; WAVE64-NEXT: .cfi_undefined 2615 +; WAVE64-NEXT: .cfi_undefined 2624 +; WAVE64-NEXT: .cfi_undefined 2625 +; WAVE64-NEXT: .cfi_undefined 2626 +; WAVE64-NEXT: .cfi_undefined 2627 +; WAVE64-NEXT: .cfi_undefined 2628 +; WAVE64-NEXT: .cfi_undefined 2629 +; WAVE64-NEXT: .cfi_undefined 2630 +; WAVE64-NEXT: .cfi_undefined 2631 +; WAVE64-NEXT: .cfi_undefined 2640 +; WAVE64-NEXT: .cfi_undefined 2641 +; WAVE64-NEXT: .cfi_undefined 2642 +; WAVE64-NEXT: .cfi_undefined 2643 +; WAVE64-NEXT: .cfi_undefined 2644 +; WAVE64-NEXT: .cfi_undefined 2645 +; WAVE64-NEXT: .cfi_undefined 2646 +; WAVE64-NEXT: .cfi_undefined 2647 +; WAVE64-NEXT: .cfi_undefined 2656 +; WAVE64-NEXT: .cfi_undefined 2657 +; WAVE64-NEXT: .cfi_undefined 2658 +; WAVE64-NEXT: .cfi_undefined 2659 +; WAVE64-NEXT: .cfi_undefined 2660 +; WAVE64-NEXT: .cfi_undefined 2661 +; WAVE64-NEXT: .cfi_undefined 2662 +; WAVE64-NEXT: .cfi_undefined 2663 +; WAVE64-NEXT: .cfi_undefined 2672 +; WAVE64-NEXT: .cfi_undefined 2673 +; WAVE64-NEXT: .cfi_undefined 2674 +; WAVE64-NEXT: .cfi_undefined 2675 +; WAVE64-NEXT: .cfi_undefined 2676 +; WAVE64-NEXT: .cfi_undefined 2677 +; WAVE64-NEXT: .cfi_undefined 2678 +; WAVE64-NEXT: .cfi_undefined 2679 +; WAVE64-NEXT: .cfi_undefined 2688 +; WAVE64-NEXT: .cfi_undefined 2689 +; WAVE64-NEXT: .cfi_undefined 36 +; WAVE64-NEXT: .cfi_undefined 37 +; WAVE64-NEXT: .cfi_undefined 38 +; WAVE64-NEXT: .cfi_undefined 39 +; WAVE64-NEXT: .cfi_undefined 40 +; WAVE64-NEXT: .cfi_undefined 41 +; WAVE64-NEXT: .cfi_undefined 42 +; WAVE64-NEXT: .cfi_undefined 43 +; WAVE64-NEXT: .cfi_undefined 44 +; WAVE64-NEXT: .cfi_undefined 45 +; WAVE64-NEXT: .cfi_undefined 46 +; WAVE64-NEXT: .cfi_undefined 47 +; WAVE64-NEXT: .cfi_undefined 48 +; WAVE64-NEXT: .cfi_undefined 49 +; WAVE64-NEXT: .cfi_undefined 50 +; WAVE64-NEXT: .cfi_undefined 51 +; WAVE64-NEXT: .cfi_undefined 52 +; WAVE64-NEXT: .cfi_undefined 53 +; WAVE64-NEXT: .cfi_undefined 54 +; WAVE64-NEXT: .cfi_undefined 55 +; WAVE64-NEXT: .cfi_undefined 56 +; WAVE64-NEXT: .cfi_undefined 57 +; WAVE64-NEXT: .cfi_undefined 58 +; WAVE64-NEXT: .cfi_undefined 59 +; WAVE64-NEXT: .cfi_undefined 60 +; WAVE64-NEXT: .cfi_undefined 61 +; WAVE64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; WAVE64-NEXT: s_mov_b32 s40, s33 +; WAVE64-NEXT: .cfi_register 65, 72 +; WAVE64-NEXT: s_mov_b32 s33, s32 +; WAVE64-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; WAVE64-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:192 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_offset 2599, 12288 +; WAVE64-NEXT: s_mov_b64 exec, s[4:5] +; WAVE64-NEXT: v_writelane_b32 v39, exec_lo, 32 +; WAVE64-NEXT: v_writelane_b32 v39, exec_hi, 33 +; WAVE64-NEXT: .cfi_llvm_vector_registers 17, 2599, 32, 32, 2599, 33, 32 +; WAVE64-NEXT: .cfi_def_cfa_register 65 +; WAVE64-NEXT: s_addk_i32 s32, 0x3200 +; WAVE64-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:188 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2600, 32, 17, 64, 12032 +; WAVE64-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:184 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2601, 32, 17, 64, 11776 +; WAVE64-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:180 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2602, 32, 17, 64, 11520 +; WAVE64-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:176 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2603, 32, 17, 64, 11264 +; WAVE64-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:172 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2604, 32, 17, 64, 11008 +; WAVE64-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:168 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2605, 32, 17, 64, 10752 +; WAVE64-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:164 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2606, 32, 17, 64, 10496 +; WAVE64-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:160 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2607, 32, 17, 64, 10240 +; WAVE64-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:156 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2616, 32, 17, 64, 9984 +; WAVE64-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:152 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2617, 32, 17, 64, 9728 +; WAVE64-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:148 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2618, 32, 17, 64, 9472 +; WAVE64-NEXT: buffer_store_dword v59, off, s[0:3], s33 offset:144 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2619, 32, 17, 64, 9216 +; WAVE64-NEXT: buffer_store_dword v60, off, s[0:3], s33 offset:140 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2620, 32, 17, 64, 8960 +; WAVE64-NEXT: buffer_store_dword v61, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2621, 32, 17, 64, 8704 +; WAVE64-NEXT: buffer_store_dword v62, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2622, 32, 17, 64, 8448 +; WAVE64-NEXT: buffer_store_dword v63, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2623, 32, 17, 64, 8192 +; WAVE64-NEXT: buffer_store_dword v72, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2632, 32, 17, 64, 7936 +; WAVE64-NEXT: buffer_store_dword v73, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2633, 32, 17, 64, 7680 +; WAVE64-NEXT: buffer_store_dword v74, off, s[0:3], s33 offset:116 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2634, 32, 17, 64, 7424 +; WAVE64-NEXT: buffer_store_dword v75, off, s[0:3], s33 offset:112 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2635, 32, 17, 64, 7168 +; WAVE64-NEXT: buffer_store_dword v76, off, s[0:3], s33 offset:108 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2636, 32, 17, 64, 6912 +; WAVE64-NEXT: buffer_store_dword v77, off, s[0:3], s33 offset:104 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2637, 32, 17, 64, 6656 +; WAVE64-NEXT: buffer_store_dword v78, off, s[0:3], s33 offset:100 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2638, 32, 17, 64, 6400 +; WAVE64-NEXT: buffer_store_dword v79, off, s[0:3], s33 offset:96 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2639, 32, 17, 64, 6144 +; WAVE64-NEXT: buffer_store_dword v88, off, s[0:3], s33 offset:92 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2648, 32, 17, 64, 5888 +; WAVE64-NEXT: buffer_store_dword v89, off, s[0:3], s33 offset:88 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2649, 32, 17, 64, 5632 +; WAVE64-NEXT: buffer_store_dword v90, off, s[0:3], s33 offset:84 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2650, 32, 17, 64, 5376 +; WAVE64-NEXT: buffer_store_dword v91, off, s[0:3], s33 offset:80 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2651, 32, 17, 64, 5120 +; WAVE64-NEXT: buffer_store_dword v92, off, s[0:3], s33 offset:76 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2652, 32, 17, 64, 4864 +; WAVE64-NEXT: buffer_store_dword v93, off, s[0:3], s33 offset:72 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2653, 32, 17, 64, 4608 +; WAVE64-NEXT: buffer_store_dword v94, off, s[0:3], s33 offset:68 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2654, 32, 17, 64, 4352 +; WAVE64-NEXT: buffer_store_dword v95, off, s[0:3], s33 offset:64 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2655, 32, 17, 64, 4096 +; WAVE64-NEXT: buffer_store_dword v104, off, s[0:3], s33 offset:60 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2664, 32, 17, 64, 3840 +; WAVE64-NEXT: buffer_store_dword v105, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2665, 32, 17, 64, 3584 +; WAVE64-NEXT: buffer_store_dword v106, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2666, 32, 17, 64, 3328 +; WAVE64-NEXT: buffer_store_dword v107, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2667, 32, 17, 64, 3072 +; WAVE64-NEXT: buffer_store_dword v108, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2668, 32, 17, 64, 2816 +; WAVE64-NEXT: buffer_store_dword v109, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2669, 32, 17, 64, 2560 +; WAVE64-NEXT: buffer_store_dword v110, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2670, 32, 17, 64, 2304 +; WAVE64-NEXT: buffer_store_dword v111, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2671, 32, 17, 64, 2048 +; WAVE64-NEXT: buffer_store_dword v120, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2680, 32, 17, 64, 1792 +; WAVE64-NEXT: buffer_store_dword v121, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2681, 32, 17, 64, 1536 +; WAVE64-NEXT: buffer_store_dword v122, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2682, 32, 17, 64, 1280 +; WAVE64-NEXT: buffer_store_dword v123, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2683, 32, 17, 64, 1024 +; WAVE64-NEXT: buffer_store_dword v124, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2684, 32, 17, 64, 768 +; WAVE64-NEXT: buffer_store_dword v125, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2685, 32, 17, 64, 512 +; WAVE64-NEXT: buffer_store_dword v126, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2686, 32, 17, 64, 256 +; WAVE64-NEXT: buffer_store_dword v127, off, s[0:3], s33 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2687, 32, 17, 64, 0 +; WAVE64-NEXT: v_writelane_b32 v39, s34, 0 +; WAVE64-NEXT: .cfi_llvm_vector_registers 66, 2599, 0, 32 +; WAVE64-NEXT: v_writelane_b32 v39, s35, 1 +; WAVE64-NEXT: .cfi_llvm_vector_registers 67, 2599, 1, 32 +; WAVE64-NEXT: v_writelane_b32 v39, s36, 2 +; WAVE64-NEXT: .cfi_llvm_vector_registers 68, 2599, 2, 32 +; WAVE64-NEXT: v_writelane_b32 v39, s37, 3 +; WAVE64-NEXT: .cfi_llvm_vector_registers 69, 2599, 3, 32 +; WAVE64-NEXT: v_writelane_b32 v39, s38, 4 +; WAVE64-NEXT: .cfi_llvm_vector_registers 70, 2599, 4, 32 +; WAVE64-NEXT: v_writelane_b32 v39, s39, 5 +; WAVE64-NEXT: .cfi_llvm_vector_registers 71, 2599, 5, 32 +; WAVE64-NEXT: v_writelane_b32 v39, s48, 6 +; WAVE64-NEXT: .cfi_llvm_vector_registers 80, 2599, 6, 32 +; WAVE64-NEXT: v_writelane_b32 v39, s49, 7 +; WAVE64-NEXT: .cfi_llvm_vector_registers 81, 2599, 7, 32 +; WAVE64-NEXT: v_writelane_b32 v39, s50, 8 +; WAVE64-NEXT: .cfi_llvm_vector_registers 82, 2599, 8, 32 +; WAVE64-NEXT: v_writelane_b32 v39, s51, 9 +; WAVE64-NEXT: .cfi_llvm_vector_registers 83, 2599, 9, 32 +; WAVE64-NEXT: v_writelane_b32 v39, s52, 10 +; WAVE64-NEXT: .cfi_llvm_vector_registers 84, 2599, 10, 32 +; WAVE64-NEXT: v_writelane_b32 v39, s53, 11 +; WAVE64-NEXT: .cfi_llvm_vector_registers 85, 2599, 11, 32 +; WAVE64-NEXT: v_writelane_b32 v39, s54, 12 +; WAVE64-NEXT: .cfi_llvm_vector_registers 86, 2599, 12, 32 +; WAVE64-NEXT: v_writelane_b32 v39, s55, 13 +; WAVE64-NEXT: .cfi_llvm_vector_registers 87, 2599, 13, 32 +; WAVE64-NEXT: v_writelane_b32 v39, s64, 14 +; WAVE64-NEXT: .cfi_llvm_vector_registers 1088, 2599, 14, 32 +; WAVE64-NEXT: v_writelane_b32 v39, s65, 15 +; WAVE64-NEXT: .cfi_llvm_vector_registers 1089, 2599, 15, 32 +; WAVE64-NEXT: v_writelane_b32 v39, s66, 16 +; WAVE64-NEXT: .cfi_llvm_vector_registers 1090, 2599, 16, 32 +; WAVE64-NEXT: v_writelane_b32 v39, s67, 17 +; WAVE64-NEXT: .cfi_llvm_vector_registers 1091, 2599, 17, 32 +; WAVE64-NEXT: v_writelane_b32 v39, s68, 18 +; WAVE64-NEXT: .cfi_llvm_vector_registers 1092, 2599, 18, 32 +; WAVE64-NEXT: v_writelane_b32 v39, s69, 19 +; WAVE64-NEXT: .cfi_llvm_vector_registers 1093, 2599, 19, 32 +; WAVE64-NEXT: v_writelane_b32 v39, s70, 20 +; WAVE64-NEXT: .cfi_llvm_vector_registers 1094, 2599, 20, 32 +; WAVE64-NEXT: v_writelane_b32 v39, s71, 21 +; WAVE64-NEXT: .cfi_llvm_vector_registers 1095, 2599, 21, 32 +; WAVE64-NEXT: v_writelane_b32 v39, s80, 22 +; WAVE64-NEXT: .cfi_llvm_vector_registers 1104, 2599, 22, 32 +; WAVE64-NEXT: v_writelane_b32 v39, s81, 23 +; WAVE64-NEXT: .cfi_llvm_vector_registers 1105, 2599, 23, 32 +; WAVE64-NEXT: v_writelane_b32 v39, s82, 24 +; WAVE64-NEXT: .cfi_llvm_vector_registers 1106, 2599, 24, 32 +; WAVE64-NEXT: v_writelane_b32 v39, s83, 25 +; WAVE64-NEXT: .cfi_llvm_vector_registers 1107, 2599, 25, 32 +; WAVE64-NEXT: v_writelane_b32 v39, s84, 26 +; WAVE64-NEXT: .cfi_llvm_vector_registers 1108, 2599, 26, 32 +; WAVE64-NEXT: v_writelane_b32 v39, s85, 27 +; WAVE64-NEXT: .cfi_llvm_vector_registers 1109, 2599, 27, 32 +; WAVE64-NEXT: v_writelane_b32 v39, s86, 28 +; WAVE64-NEXT: .cfi_llvm_vector_registers 1110, 2599, 28, 32 +; WAVE64-NEXT: v_writelane_b32 v39, s87, 29 +; WAVE64-NEXT: .cfi_llvm_vector_registers 1111, 2599, 29, 32 +; WAVE64-NEXT: v_writelane_b32 v39, s96, 30 +; WAVE64-NEXT: .cfi_llvm_vector_registers 1120, 2599, 30, 32 +; WAVE64-NEXT: v_writelane_b32 v39, s97, 31 +; WAVE64-NEXT: .cfi_llvm_vector_registers 1121, 2599, 31, 32 +; WAVE64-NEXT: ;;#ASMSTART +; WAVE64-NEXT: ; clobber nonpreserved and 32 CSR SGPRs +; WAVE64-NEXT: ;;#ASMEND +; WAVE64-NEXT: ;;#ASMSTART +; WAVE64-NEXT: ; clobber all VGPRs except v39 +; WAVE64-NEXT: ;;#ASMEND +; WAVE64-NEXT: buffer_load_dword v127, off, s[0:3], s33 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v126, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v125, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v124, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v123, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v122, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v121, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v120, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v111, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v110, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v109, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v108, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v107, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v106, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v105, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v104, off, s[0:3], s33 offset:60 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v95, off, s[0:3], s33 offset:64 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v94, off, s[0:3], s33 offset:68 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v93, off, s[0:3], s33 offset:72 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v92, off, s[0:3], s33 offset:76 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v91, off, s[0:3], s33 offset:80 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v90, off, s[0:3], s33 offset:84 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v89, off, s[0:3], s33 offset:88 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v88, off, s[0:3], s33 offset:92 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v79, off, s[0:3], s33 offset:96 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v78, off, s[0:3], s33 offset:100 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v77, off, s[0:3], s33 offset:104 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v76, off, s[0:3], s33 offset:108 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v75, off, s[0:3], s33 offset:112 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v74, off, s[0:3], s33 offset:116 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v73, off, s[0:3], s33 offset:120 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v72, off, s[0:3], s33 offset:124 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v63, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v62, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v61, off, s[0:3], s33 offset:136 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v60, off, s[0:3], s33 offset:140 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v59, off, s[0:3], s33 offset:144 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v58, off, s[0:3], s33 offset:148 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v57, off, s[0:3], s33 offset:152 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v56, off, s[0:3], s33 offset:156 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v47, off, s[0:3], s33 offset:160 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v46, off, s[0:3], s33 offset:164 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v45, off, s[0:3], s33 offset:168 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v44, off, s[0:3], s33 offset:172 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:176 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:180 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:184 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:188 ; 4-byte Folded Reload +; WAVE64-NEXT: v_readlane_b32 s97, v39, 31 +; WAVE64-NEXT: v_readlane_b32 s96, v39, 30 +; WAVE64-NEXT: v_readlane_b32 s87, v39, 29 +; WAVE64-NEXT: v_readlane_b32 s86, v39, 28 +; WAVE64-NEXT: v_readlane_b32 s85, v39, 27 +; WAVE64-NEXT: v_readlane_b32 s84, v39, 26 +; WAVE64-NEXT: v_readlane_b32 s83, v39, 25 +; WAVE64-NEXT: v_readlane_b32 s82, v39, 24 +; WAVE64-NEXT: v_readlane_b32 s81, v39, 23 +; WAVE64-NEXT: v_readlane_b32 s80, v39, 22 +; WAVE64-NEXT: v_readlane_b32 s71, v39, 21 +; WAVE64-NEXT: v_readlane_b32 s70, v39, 20 +; WAVE64-NEXT: v_readlane_b32 s69, v39, 19 +; WAVE64-NEXT: v_readlane_b32 s68, v39, 18 +; WAVE64-NEXT: v_readlane_b32 s67, v39, 17 +; WAVE64-NEXT: v_readlane_b32 s66, v39, 16 +; WAVE64-NEXT: v_readlane_b32 s65, v39, 15 +; WAVE64-NEXT: v_readlane_b32 s64, v39, 14 +; WAVE64-NEXT: v_readlane_b32 s55, v39, 13 +; WAVE64-NEXT: v_readlane_b32 s54, v39, 12 +; WAVE64-NEXT: v_readlane_b32 s53, v39, 11 +; WAVE64-NEXT: v_readlane_b32 s52, v39, 10 +; WAVE64-NEXT: v_readlane_b32 s51, v39, 9 +; WAVE64-NEXT: v_readlane_b32 s50, v39, 8 +; WAVE64-NEXT: v_readlane_b32 s49, v39, 7 +; WAVE64-NEXT: v_readlane_b32 s48, v39, 6 +; WAVE64-NEXT: v_readlane_b32 s39, v39, 5 +; WAVE64-NEXT: v_readlane_b32 s38, v39, 4 +; WAVE64-NEXT: v_readlane_b32 s37, v39, 3 +; WAVE64-NEXT: v_readlane_b32 s36, v39, 2 +; WAVE64-NEXT: v_readlane_b32 s35, v39, 1 +; WAVE64-NEXT: v_readlane_b32 s34, v39, 0 +; WAVE64-NEXT: s_mov_b32 s32, s33 +; WAVE64-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; WAVE64-NEXT: buffer_load_dword v39, off, s[0:3], s33 offset:192 ; 4-byte Folded Reload +; WAVE64-NEXT: s_mov_b64 exec, s[4:5] +; WAVE64-NEXT: .cfi_def_cfa_register 64 +; WAVE64-NEXT: s_mov_b32 s33, s40 +; WAVE64-NEXT: s_waitcnt vmcnt(0) +; WAVE64-NEXT: s_setpc_b64 s[30:31] +; +; WAVE32-LABEL: callee_need_to_spill_fp_exec_to_memory: +; WAVE32: .Lfunc_begin5: +; WAVE32-NEXT: .cfi_startproc +; WAVE32-NEXT: ; %bb.0: +; WAVE32-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; WAVE32-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; WAVE32-NEXT: .cfi_undefined 1536 +; WAVE32-NEXT: .cfi_undefined 1537 +; WAVE32-NEXT: .cfi_undefined 1538 +; WAVE32-NEXT: .cfi_undefined 1539 +; WAVE32-NEXT: .cfi_undefined 1540 +; WAVE32-NEXT: .cfi_undefined 1541 +; WAVE32-NEXT: .cfi_undefined 1542 +; WAVE32-NEXT: .cfi_undefined 1543 +; WAVE32-NEXT: .cfi_undefined 1544 +; WAVE32-NEXT: .cfi_undefined 1545 +; WAVE32-NEXT: .cfi_undefined 1546 +; WAVE32-NEXT: .cfi_undefined 1547 +; WAVE32-NEXT: .cfi_undefined 1548 +; WAVE32-NEXT: .cfi_undefined 1549 +; WAVE32-NEXT: .cfi_undefined 1550 +; WAVE32-NEXT: .cfi_undefined 1551 +; WAVE32-NEXT: .cfi_undefined 1552 +; WAVE32-NEXT: .cfi_undefined 1553 +; WAVE32-NEXT: .cfi_undefined 1554 +; WAVE32-NEXT: .cfi_undefined 1555 +; WAVE32-NEXT: .cfi_undefined 1556 +; WAVE32-NEXT: .cfi_undefined 1557 +; WAVE32-NEXT: .cfi_undefined 1558 +; WAVE32-NEXT: .cfi_undefined 1559 +; WAVE32-NEXT: .cfi_undefined 1560 +; WAVE32-NEXT: .cfi_undefined 1561 +; WAVE32-NEXT: .cfi_undefined 1562 +; WAVE32-NEXT: .cfi_undefined 1563 +; WAVE32-NEXT: .cfi_undefined 1564 +; WAVE32-NEXT: .cfi_undefined 1565 +; WAVE32-NEXT: .cfi_undefined 1566 +; WAVE32-NEXT: .cfi_undefined 1567 +; WAVE32-NEXT: .cfi_undefined 1568 +; WAVE32-NEXT: .cfi_undefined 1569 +; WAVE32-NEXT: .cfi_undefined 1570 +; WAVE32-NEXT: .cfi_undefined 1571 +; WAVE32-NEXT: .cfi_undefined 1572 +; WAVE32-NEXT: .cfi_undefined 1573 +; WAVE32-NEXT: .cfi_undefined 1574 +; WAVE32-NEXT: .cfi_undefined 1575 +; WAVE32-NEXT: .cfi_undefined 1584 +; WAVE32-NEXT: .cfi_undefined 1585 +; WAVE32-NEXT: .cfi_undefined 1586 +; WAVE32-NEXT: .cfi_undefined 1587 +; WAVE32-NEXT: .cfi_undefined 1588 +; WAVE32-NEXT: .cfi_undefined 1589 +; WAVE32-NEXT: .cfi_undefined 1590 +; WAVE32-NEXT: .cfi_undefined 1591 +; WAVE32-NEXT: .cfi_undefined 1600 +; WAVE32-NEXT: .cfi_undefined 1601 +; WAVE32-NEXT: .cfi_undefined 1602 +; WAVE32-NEXT: .cfi_undefined 1603 +; WAVE32-NEXT: .cfi_undefined 1604 +; WAVE32-NEXT: .cfi_undefined 1605 +; WAVE32-NEXT: .cfi_undefined 1606 +; WAVE32-NEXT: .cfi_undefined 1607 +; WAVE32-NEXT: .cfi_undefined 1616 +; WAVE32-NEXT: .cfi_undefined 1617 +; WAVE32-NEXT: .cfi_undefined 1618 +; WAVE32-NEXT: .cfi_undefined 1619 +; WAVE32-NEXT: .cfi_undefined 1620 +; WAVE32-NEXT: .cfi_undefined 1621 +; WAVE32-NEXT: .cfi_undefined 1622 +; WAVE32-NEXT: .cfi_undefined 1623 +; WAVE32-NEXT: .cfi_undefined 1632 +; WAVE32-NEXT: .cfi_undefined 1633 +; WAVE32-NEXT: .cfi_undefined 1634 +; WAVE32-NEXT: .cfi_undefined 1635 +; WAVE32-NEXT: .cfi_undefined 1636 +; WAVE32-NEXT: .cfi_undefined 1637 +; WAVE32-NEXT: .cfi_undefined 1638 +; WAVE32-NEXT: .cfi_undefined 1639 +; WAVE32-NEXT: .cfi_undefined 1648 +; WAVE32-NEXT: .cfi_undefined 1649 +; WAVE32-NEXT: .cfi_undefined 1650 +; WAVE32-NEXT: .cfi_undefined 1651 +; WAVE32-NEXT: .cfi_undefined 1652 +; WAVE32-NEXT: .cfi_undefined 1653 +; WAVE32-NEXT: .cfi_undefined 1654 +; WAVE32-NEXT: .cfi_undefined 1655 +; WAVE32-NEXT: .cfi_undefined 1664 +; WAVE32-NEXT: .cfi_undefined 1665 +; WAVE32-NEXT: .cfi_undefined 36 +; WAVE32-NEXT: .cfi_undefined 37 +; WAVE32-NEXT: .cfi_undefined 38 +; WAVE32-NEXT: .cfi_undefined 39 +; WAVE32-NEXT: .cfi_undefined 40 +; WAVE32-NEXT: .cfi_undefined 41 +; WAVE32-NEXT: .cfi_undefined 42 +; WAVE32-NEXT: .cfi_undefined 43 +; WAVE32-NEXT: .cfi_undefined 44 +; WAVE32-NEXT: .cfi_undefined 45 +; WAVE32-NEXT: .cfi_undefined 46 +; WAVE32-NEXT: .cfi_undefined 47 +; WAVE32-NEXT: .cfi_undefined 48 +; WAVE32-NEXT: .cfi_undefined 49 +; WAVE32-NEXT: .cfi_undefined 50 +; WAVE32-NEXT: .cfi_undefined 51 +; WAVE32-NEXT: .cfi_undefined 52 +; WAVE32-NEXT: .cfi_undefined 53 +; WAVE32-NEXT: .cfi_undefined 54 +; WAVE32-NEXT: .cfi_undefined 55 +; WAVE32-NEXT: .cfi_undefined 56 +; WAVE32-NEXT: .cfi_undefined 57 +; WAVE32-NEXT: .cfi_undefined 58 +; WAVE32-NEXT: .cfi_undefined 59 +; WAVE32-NEXT: .cfi_undefined 60 +; WAVE32-NEXT: .cfi_undefined 61 +; WAVE32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; WAVE32-NEXT: s_mov_b32 s40, s33 +; WAVE32-NEXT: .cfi_register 65, 72 +; WAVE32-NEXT: s_mov_b32 s33, s32 +; WAVE32-NEXT: s_xor_saveexec_b32 s4, -1 +; WAVE32-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:192 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_offset 1575, 6144 +; WAVE32-NEXT: s_waitcnt_depctr 0xffe3 +; WAVE32-NEXT: s_mov_b32 exec_lo, s4 +; WAVE32-NEXT: v_mov_b32_e32 v0, exec_lo +; WAVE32-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:196 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_offset 1, 6272 +; WAVE32-NEXT: .cfi_def_cfa_register 65 +; WAVE32-NEXT: s_addk_i32 s32, 0x1980 +; WAVE32-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:188 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1576, 32, 1, 32, 6016 +; WAVE32-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:184 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1577, 32, 1, 32, 5888 +; WAVE32-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:180 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1578, 32, 1, 32, 5760 +; WAVE32-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:176 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1579, 32, 1, 32, 5632 +; WAVE32-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:172 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1580, 32, 1, 32, 5504 +; WAVE32-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:168 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1581, 32, 1, 32, 5376 +; WAVE32-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:164 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1582, 32, 1, 32, 5248 +; WAVE32-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:160 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1583, 32, 1, 32, 5120 +; WAVE32-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:156 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1592, 32, 1, 32, 4992 +; WAVE32-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:152 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1593, 32, 1, 32, 4864 +; WAVE32-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:148 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1594, 32, 1, 32, 4736 +; WAVE32-NEXT: buffer_store_dword v59, off, s[0:3], s33 offset:144 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1595, 32, 1, 32, 4608 +; WAVE32-NEXT: buffer_store_dword v60, off, s[0:3], s33 offset:140 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1596, 32, 1, 32, 4480 +; WAVE32-NEXT: buffer_store_dword v61, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1597, 32, 1, 32, 4352 +; WAVE32-NEXT: buffer_store_dword v62, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1598, 32, 1, 32, 4224 +; WAVE32-NEXT: buffer_store_dword v63, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1599, 32, 1, 32, 4096 +; WAVE32-NEXT: buffer_store_dword v72, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1608, 32, 1, 32, 3968 +; WAVE32-NEXT: buffer_store_dword v73, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1609, 32, 1, 32, 3840 +; WAVE32-NEXT: buffer_store_dword v74, off, s[0:3], s33 offset:116 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1610, 32, 1, 32, 3712 +; WAVE32-NEXT: buffer_store_dword v75, off, s[0:3], s33 offset:112 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1611, 32, 1, 32, 3584 +; WAVE32-NEXT: buffer_store_dword v76, off, s[0:3], s33 offset:108 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1612, 32, 1, 32, 3456 +; WAVE32-NEXT: buffer_store_dword v77, off, s[0:3], s33 offset:104 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1613, 32, 1, 32, 3328 +; WAVE32-NEXT: buffer_store_dword v78, off, s[0:3], s33 offset:100 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1614, 32, 1, 32, 3200 +; WAVE32-NEXT: buffer_store_dword v79, off, s[0:3], s33 offset:96 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1615, 32, 1, 32, 3072 +; WAVE32-NEXT: buffer_store_dword v88, off, s[0:3], s33 offset:92 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1624, 32, 1, 32, 2944 +; WAVE32-NEXT: buffer_store_dword v89, off, s[0:3], s33 offset:88 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1625, 32, 1, 32, 2816 +; WAVE32-NEXT: buffer_store_dword v90, off, s[0:3], s33 offset:84 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1626, 32, 1, 32, 2688 +; WAVE32-NEXT: buffer_store_dword v91, off, s[0:3], s33 offset:80 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1627, 32, 1, 32, 2560 +; WAVE32-NEXT: buffer_store_dword v92, off, s[0:3], s33 offset:76 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1628, 32, 1, 32, 2432 +; WAVE32-NEXT: buffer_store_dword v93, off, s[0:3], s33 offset:72 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1629, 32, 1, 32, 2304 +; WAVE32-NEXT: buffer_store_dword v94, off, s[0:3], s33 offset:68 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1630, 32, 1, 32, 2176 +; WAVE32-NEXT: buffer_store_dword v95, off, s[0:3], s33 offset:64 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1631, 32, 1, 32, 2048 +; WAVE32-NEXT: buffer_store_dword v104, off, s[0:3], s33 offset:60 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1640, 32, 1, 32, 1920 +; WAVE32-NEXT: buffer_store_dword v105, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1641, 32, 1, 32, 1792 +; WAVE32-NEXT: buffer_store_dword v106, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1642, 32, 1, 32, 1664 +; WAVE32-NEXT: buffer_store_dword v107, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1643, 32, 1, 32, 1536 +; WAVE32-NEXT: buffer_store_dword v108, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1644, 32, 1, 32, 1408 +; WAVE32-NEXT: buffer_store_dword v109, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1645, 32, 1, 32, 1280 +; WAVE32-NEXT: buffer_store_dword v110, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1646, 32, 1, 32, 1152 +; WAVE32-NEXT: buffer_store_dword v111, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1647, 32, 1, 32, 1024 +; WAVE32-NEXT: buffer_store_dword v120, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1656, 32, 1, 32, 896 +; WAVE32-NEXT: buffer_store_dword v121, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1657, 32, 1, 32, 768 +; WAVE32-NEXT: buffer_store_dword v122, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1658, 32, 1, 32, 640 +; WAVE32-NEXT: buffer_store_dword v123, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1659, 32, 1, 32, 512 +; WAVE32-NEXT: buffer_store_dword v124, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1660, 32, 1, 32, 384 +; WAVE32-NEXT: buffer_store_dword v125, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1661, 32, 1, 32, 256 +; WAVE32-NEXT: buffer_store_dword v126, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1662, 32, 1, 32, 128 +; WAVE32-NEXT: buffer_store_dword v127, off, s[0:3], s33 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1663, 32, 1, 32, 0 +; WAVE32-NEXT: v_writelane_b32 v39, s34, 0 +; WAVE32-NEXT: .cfi_llvm_vector_registers 66, 1575, 0, 32 +; WAVE32-NEXT: v_writelane_b32 v39, s35, 1 +; WAVE32-NEXT: .cfi_llvm_vector_registers 67, 1575, 1, 32 +; WAVE32-NEXT: v_writelane_b32 v39, s36, 2 +; WAVE32-NEXT: .cfi_llvm_vector_registers 68, 1575, 2, 32 +; WAVE32-NEXT: v_writelane_b32 v39, s37, 3 +; WAVE32-NEXT: .cfi_llvm_vector_registers 69, 1575, 3, 32 +; WAVE32-NEXT: v_writelane_b32 v39, s38, 4 +; WAVE32-NEXT: .cfi_llvm_vector_registers 70, 1575, 4, 32 +; WAVE32-NEXT: v_writelane_b32 v39, s39, 5 +; WAVE32-NEXT: .cfi_llvm_vector_registers 71, 1575, 5, 32 +; WAVE32-NEXT: v_writelane_b32 v39, s48, 6 +; WAVE32-NEXT: .cfi_llvm_vector_registers 80, 1575, 6, 32 +; WAVE32-NEXT: v_writelane_b32 v39, s49, 7 +; WAVE32-NEXT: .cfi_llvm_vector_registers 81, 1575, 7, 32 +; WAVE32-NEXT: v_writelane_b32 v39, s50, 8 +; WAVE32-NEXT: .cfi_llvm_vector_registers 82, 1575, 8, 32 +; WAVE32-NEXT: v_writelane_b32 v39, s51, 9 +; WAVE32-NEXT: .cfi_llvm_vector_registers 83, 1575, 9, 32 +; WAVE32-NEXT: v_writelane_b32 v39, s52, 10 +; WAVE32-NEXT: .cfi_llvm_vector_registers 84, 1575, 10, 32 +; WAVE32-NEXT: v_writelane_b32 v39, s53, 11 +; WAVE32-NEXT: .cfi_llvm_vector_registers 85, 1575, 11, 32 +; WAVE32-NEXT: v_writelane_b32 v39, s54, 12 +; WAVE32-NEXT: .cfi_llvm_vector_registers 86, 1575, 12, 32 +; WAVE32-NEXT: v_writelane_b32 v39, s55, 13 +; WAVE32-NEXT: .cfi_llvm_vector_registers 87, 1575, 13, 32 +; WAVE32-NEXT: v_writelane_b32 v39, s64, 14 +; WAVE32-NEXT: .cfi_llvm_vector_registers 1088, 1575, 14, 32 +; WAVE32-NEXT: v_writelane_b32 v39, s65, 15 +; WAVE32-NEXT: .cfi_llvm_vector_registers 1089, 1575, 15, 32 +; WAVE32-NEXT: v_writelane_b32 v39, s66, 16 +; WAVE32-NEXT: .cfi_llvm_vector_registers 1090, 1575, 16, 32 +; WAVE32-NEXT: v_writelane_b32 v39, s67, 17 +; WAVE32-NEXT: .cfi_llvm_vector_registers 1091, 1575, 17, 32 +; WAVE32-NEXT: v_writelane_b32 v39, s68, 18 +; WAVE32-NEXT: .cfi_llvm_vector_registers 1092, 1575, 18, 32 +; WAVE32-NEXT: v_writelane_b32 v39, s69, 19 +; WAVE32-NEXT: .cfi_llvm_vector_registers 1093, 1575, 19, 32 +; WAVE32-NEXT: v_writelane_b32 v39, s70, 20 +; WAVE32-NEXT: .cfi_llvm_vector_registers 1094, 1575, 20, 32 +; WAVE32-NEXT: v_writelane_b32 v39, s71, 21 +; WAVE32-NEXT: .cfi_llvm_vector_registers 1095, 1575, 21, 32 +; WAVE32-NEXT: v_writelane_b32 v39, s80, 22 +; WAVE32-NEXT: .cfi_llvm_vector_registers 1104, 1575, 22, 32 +; WAVE32-NEXT: v_writelane_b32 v39, s81, 23 +; WAVE32-NEXT: .cfi_llvm_vector_registers 1105, 1575, 23, 32 +; WAVE32-NEXT: v_writelane_b32 v39, s82, 24 +; WAVE32-NEXT: .cfi_llvm_vector_registers 1106, 1575, 24, 32 +; WAVE32-NEXT: v_writelane_b32 v39, s83, 25 +; WAVE32-NEXT: .cfi_llvm_vector_registers 1107, 1575, 25, 32 +; WAVE32-NEXT: v_writelane_b32 v39, s84, 26 +; WAVE32-NEXT: .cfi_llvm_vector_registers 1108, 1575, 26, 32 +; WAVE32-NEXT: v_writelane_b32 v39, s85, 27 +; WAVE32-NEXT: .cfi_llvm_vector_registers 1109, 1575, 27, 32 +; WAVE32-NEXT: v_writelane_b32 v39, s86, 28 +; WAVE32-NEXT: .cfi_llvm_vector_registers 1110, 1575, 28, 32 +; WAVE32-NEXT: v_writelane_b32 v39, s87, 29 +; WAVE32-NEXT: .cfi_llvm_vector_registers 1111, 1575, 29, 32 +; WAVE32-NEXT: v_writelane_b32 v39, s96, 30 +; WAVE32-NEXT: .cfi_llvm_vector_registers 1120, 1575, 30, 32 +; WAVE32-NEXT: v_writelane_b32 v39, s97, 31 +; WAVE32-NEXT: .cfi_llvm_vector_registers 1121, 1575, 31, 32 +; WAVE32-NEXT: ;;#ASMSTART +; WAVE32-NEXT: ; clobber nonpreserved and 32 CSR SGPRs +; WAVE32-NEXT: ;;#ASMEND +; WAVE32-NEXT: ;;#ASMSTART +; WAVE32-NEXT: ; clobber all VGPRs except v39 +; WAVE32-NEXT: ;;#ASMEND +; WAVE32-NEXT: s_clause 0x2f +; WAVE32-NEXT: buffer_load_dword v127, off, s[0:3], s33 +; WAVE32-NEXT: buffer_load_dword v126, off, s[0:3], s33 offset:4 +; WAVE32-NEXT: buffer_load_dword v125, off, s[0:3], s33 offset:8 +; WAVE32-NEXT: buffer_load_dword v124, off, s[0:3], s33 offset:12 +; WAVE32-NEXT: buffer_load_dword v123, off, s[0:3], s33 offset:16 +; WAVE32-NEXT: buffer_load_dword v122, off, s[0:3], s33 offset:20 +; WAVE32-NEXT: buffer_load_dword v121, off, s[0:3], s33 offset:24 +; WAVE32-NEXT: buffer_load_dword v120, off, s[0:3], s33 offset:28 +; WAVE32-NEXT: buffer_load_dword v111, off, s[0:3], s33 offset:32 +; WAVE32-NEXT: buffer_load_dword v110, off, s[0:3], s33 offset:36 +; WAVE32-NEXT: buffer_load_dword v109, off, s[0:3], s33 offset:40 +; WAVE32-NEXT: buffer_load_dword v108, off, s[0:3], s33 offset:44 +; WAVE32-NEXT: buffer_load_dword v107, off, s[0:3], s33 offset:48 +; WAVE32-NEXT: buffer_load_dword v106, off, s[0:3], s33 offset:52 +; WAVE32-NEXT: buffer_load_dword v105, off, s[0:3], s33 offset:56 +; WAVE32-NEXT: buffer_load_dword v104, off, s[0:3], s33 offset:60 +; WAVE32-NEXT: buffer_load_dword v95, off, s[0:3], s33 offset:64 +; WAVE32-NEXT: buffer_load_dword v94, off, s[0:3], s33 offset:68 +; WAVE32-NEXT: buffer_load_dword v93, off, s[0:3], s33 offset:72 +; WAVE32-NEXT: buffer_load_dword v92, off, s[0:3], s33 offset:76 +; WAVE32-NEXT: buffer_load_dword v91, off, s[0:3], s33 offset:80 +; WAVE32-NEXT: buffer_load_dword v90, off, s[0:3], s33 offset:84 +; WAVE32-NEXT: buffer_load_dword v89, off, s[0:3], s33 offset:88 +; WAVE32-NEXT: buffer_load_dword v88, off, s[0:3], s33 offset:92 +; WAVE32-NEXT: buffer_load_dword v79, off, s[0:3], s33 offset:96 +; WAVE32-NEXT: buffer_load_dword v78, off, s[0:3], s33 offset:100 +; WAVE32-NEXT: buffer_load_dword v77, off, s[0:3], s33 offset:104 +; WAVE32-NEXT: buffer_load_dword v76, off, s[0:3], s33 offset:108 +; WAVE32-NEXT: buffer_load_dword v75, off, s[0:3], s33 offset:112 +; WAVE32-NEXT: buffer_load_dword v74, off, s[0:3], s33 offset:116 +; WAVE32-NEXT: buffer_load_dword v73, off, s[0:3], s33 offset:120 +; WAVE32-NEXT: buffer_load_dword v72, off, s[0:3], s33 offset:124 +; WAVE32-NEXT: buffer_load_dword v63, off, s[0:3], s33 offset:128 +; WAVE32-NEXT: buffer_load_dword v62, off, s[0:3], s33 offset:132 +; WAVE32-NEXT: buffer_load_dword v61, off, s[0:3], s33 offset:136 +; WAVE32-NEXT: buffer_load_dword v60, off, s[0:3], s33 offset:140 +; WAVE32-NEXT: buffer_load_dword v59, off, s[0:3], s33 offset:144 +; WAVE32-NEXT: buffer_load_dword v58, off, s[0:3], s33 offset:148 +; WAVE32-NEXT: buffer_load_dword v57, off, s[0:3], s33 offset:152 +; WAVE32-NEXT: buffer_load_dword v56, off, s[0:3], s33 offset:156 +; WAVE32-NEXT: buffer_load_dword v47, off, s[0:3], s33 offset:160 +; WAVE32-NEXT: buffer_load_dword v46, off, s[0:3], s33 offset:164 +; WAVE32-NEXT: buffer_load_dword v45, off, s[0:3], s33 offset:168 +; WAVE32-NEXT: buffer_load_dword v44, off, s[0:3], s33 offset:172 +; WAVE32-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:176 +; WAVE32-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:180 +; WAVE32-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:184 +; WAVE32-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:188 +; WAVE32-NEXT: v_readlane_b32 s97, v39, 31 +; WAVE32-NEXT: v_readlane_b32 s96, v39, 30 +; WAVE32-NEXT: v_readlane_b32 s87, v39, 29 +; WAVE32-NEXT: v_readlane_b32 s86, v39, 28 +; WAVE32-NEXT: v_readlane_b32 s85, v39, 27 +; WAVE32-NEXT: v_readlane_b32 s84, v39, 26 +; WAVE32-NEXT: v_readlane_b32 s83, v39, 25 +; WAVE32-NEXT: v_readlane_b32 s82, v39, 24 +; WAVE32-NEXT: v_readlane_b32 s81, v39, 23 +; WAVE32-NEXT: v_readlane_b32 s80, v39, 22 +; WAVE32-NEXT: v_readlane_b32 s71, v39, 21 +; WAVE32-NEXT: v_readlane_b32 s70, v39, 20 +; WAVE32-NEXT: v_readlane_b32 s69, v39, 19 +; WAVE32-NEXT: v_readlane_b32 s68, v39, 18 +; WAVE32-NEXT: v_readlane_b32 s67, v39, 17 +; WAVE32-NEXT: v_readlane_b32 s66, v39, 16 +; WAVE32-NEXT: v_readlane_b32 s65, v39, 15 +; WAVE32-NEXT: v_readlane_b32 s64, v39, 14 +; WAVE32-NEXT: v_readlane_b32 s55, v39, 13 +; WAVE32-NEXT: v_readlane_b32 s54, v39, 12 +; WAVE32-NEXT: v_readlane_b32 s53, v39, 11 +; WAVE32-NEXT: v_readlane_b32 s52, v39, 10 +; WAVE32-NEXT: v_readlane_b32 s51, v39, 9 +; WAVE32-NEXT: v_readlane_b32 s50, v39, 8 +; WAVE32-NEXT: v_readlane_b32 s49, v39, 7 +; WAVE32-NEXT: v_readlane_b32 s48, v39, 6 +; WAVE32-NEXT: v_readlane_b32 s39, v39, 5 +; WAVE32-NEXT: v_readlane_b32 s38, v39, 4 +; WAVE32-NEXT: v_readlane_b32 s37, v39, 3 +; WAVE32-NEXT: v_readlane_b32 s36, v39, 2 +; WAVE32-NEXT: v_readlane_b32 s35, v39, 1 +; WAVE32-NEXT: v_readlane_b32 s34, v39, 0 +; WAVE32-NEXT: s_mov_b32 s32, s33 +; WAVE32-NEXT: s_xor_saveexec_b32 s4, -1 +; WAVE32-NEXT: buffer_load_dword v39, off, s[0:3], s33 offset:192 ; 4-byte Folded Reload +; WAVE32-NEXT: s_waitcnt_depctr 0xffe3 +; WAVE32-NEXT: s_mov_b32 exec_lo, s4 +; WAVE32-NEXT: .cfi_def_cfa_register 64 +; WAVE32-NEXT: s_mov_b32 s33, s40 +; WAVE32-NEXT: s_waitcnt vmcnt(0) +; WAVE32-NEXT: s_setpc_b64 s[30:31] + call void asm sideeffect "; clobber nonpreserved and 32 CSR SGPRs", + "~{s4},~{s5},~{s6},~{s7},~{s8},~{s9} + ,~{s10},~{s11},~{s12},~{s13},~{s14},~{s15},~{s16},~{s17},~{s18},~{s19} + ,~{s20},~{s21},~{s22},~{s23},~{s24},~{s25},~{s26},~{s27},~{s28},~{s29} + ,~{s34},~{s35},~{s36},~{s37},~{s38},~{s39} + ,~{s48},~{s49},~{s50},~{s51},~{s52},~{s53},~{s54},~{s55},~{s64},~{s65} + ,~{s66},~{s67},~{s68},~{s69},~{s70},~{s71},~{s80},~{s81},~{s82},~{s83} + ,~{s84},~{s85},~{s86},~{s87},~{s96},~{s97} + ,~{vcc}"() + + call void asm sideeffect "; clobber all VGPRs except v39", + "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9} + ,~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19} + ,~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29} + ,~{v30},~{v31},~{v32},~{v33},~{v34},~{v35},~{v36},~{v37},~{v38} + ,~{v40},~{v41},~{v42},~{v43},~{v44},~{v45},~{v46},~{v47},~{v48},~{v49} + ,~{v50},~{v51},~{v52},~{v53},~{v54},~{v55},~{v56},~{v57},~{v58},~{v59} + ,~{v60},~{v61},~{v62},~{v63},~{v64},~{v65},~{v66},~{v67},~{v68},~{v69} + ,~{v70},~{v71},~{v72},~{v73},~{v74},~{v75},~{v76},~{v77},~{v78},~{v79} + ,~{v80},~{v81},~{v82},~{v83},~{v84},~{v85},~{v86},~{v87},~{v88},~{v89} + ,~{v90},~{v91},~{v92},~{v93},~{v94},~{v95},~{v96},~{v97},~{v98},~{v99} + ,~{v100},~{v101},~{v102},~{v103},~{v104},~{v105},~{v106},~{v107},~{v108},~{v109} + ,~{v110},~{v111},~{v112},~{v113},~{v114},~{v115},~{v116},~{v117},~{v118},~{v119} + ,~{v120},~{v121},~{v122},~{v123},~{v124},~{v125},~{v126},~{v127},~{v128},~{v129}"() + ret void +} + +define internal void @caller_needs_to_spill_pc_to_memory() #3 { +; WAVE64-LABEL: caller_needs_to_spill_pc_to_memory: +; WAVE64: .Lfunc_begin6: +; WAVE64-NEXT: .cfi_startproc +; WAVE64-NEXT: ; %bb.0: +; WAVE64-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; WAVE64-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; WAVE64-NEXT: .cfi_undefined 2560 +; WAVE64-NEXT: .cfi_undefined 2561 +; WAVE64-NEXT: .cfi_undefined 2562 +; WAVE64-NEXT: .cfi_undefined 2563 +; WAVE64-NEXT: .cfi_undefined 2564 +; WAVE64-NEXT: .cfi_undefined 2565 +; WAVE64-NEXT: .cfi_undefined 2566 +; WAVE64-NEXT: .cfi_undefined 2567 +; WAVE64-NEXT: .cfi_undefined 2568 +; WAVE64-NEXT: .cfi_undefined 2569 +; WAVE64-NEXT: .cfi_undefined 2570 +; WAVE64-NEXT: .cfi_undefined 2571 +; WAVE64-NEXT: .cfi_undefined 2572 +; WAVE64-NEXT: .cfi_undefined 2573 +; WAVE64-NEXT: .cfi_undefined 2574 +; WAVE64-NEXT: .cfi_undefined 2575 +; WAVE64-NEXT: .cfi_undefined 2576 +; WAVE64-NEXT: .cfi_undefined 2577 +; WAVE64-NEXT: .cfi_undefined 2578 +; WAVE64-NEXT: .cfi_undefined 2579 +; WAVE64-NEXT: .cfi_undefined 2580 +; WAVE64-NEXT: .cfi_undefined 2581 +; WAVE64-NEXT: .cfi_undefined 2582 +; WAVE64-NEXT: .cfi_undefined 2583 +; WAVE64-NEXT: .cfi_undefined 2584 +; WAVE64-NEXT: .cfi_undefined 2585 +; WAVE64-NEXT: .cfi_undefined 2586 +; WAVE64-NEXT: .cfi_undefined 2587 +; WAVE64-NEXT: .cfi_undefined 2588 +; WAVE64-NEXT: .cfi_undefined 2589 +; WAVE64-NEXT: .cfi_undefined 2590 +; WAVE64-NEXT: .cfi_undefined 2591 +; WAVE64-NEXT: .cfi_undefined 2592 +; WAVE64-NEXT: .cfi_undefined 2593 +; WAVE64-NEXT: .cfi_undefined 2594 +; WAVE64-NEXT: .cfi_undefined 2595 +; WAVE64-NEXT: .cfi_undefined 2596 +; WAVE64-NEXT: .cfi_undefined 2597 +; WAVE64-NEXT: .cfi_undefined 2598 +; WAVE64-NEXT: .cfi_undefined 2599 +; WAVE64-NEXT: .cfi_undefined 2608 +; WAVE64-NEXT: .cfi_undefined 2609 +; WAVE64-NEXT: .cfi_undefined 2610 +; WAVE64-NEXT: .cfi_undefined 2611 +; WAVE64-NEXT: .cfi_undefined 2612 +; WAVE64-NEXT: .cfi_undefined 2613 +; WAVE64-NEXT: .cfi_undefined 2614 +; WAVE64-NEXT: .cfi_undefined 2615 +; WAVE64-NEXT: .cfi_undefined 2624 +; WAVE64-NEXT: .cfi_undefined 2625 +; WAVE64-NEXT: .cfi_undefined 2626 +; WAVE64-NEXT: .cfi_undefined 2627 +; WAVE64-NEXT: .cfi_undefined 2628 +; WAVE64-NEXT: .cfi_undefined 2629 +; WAVE64-NEXT: .cfi_undefined 2630 +; WAVE64-NEXT: .cfi_undefined 2631 +; WAVE64-NEXT: .cfi_undefined 2640 +; WAVE64-NEXT: .cfi_undefined 2641 +; WAVE64-NEXT: .cfi_undefined 2642 +; WAVE64-NEXT: .cfi_undefined 2643 +; WAVE64-NEXT: .cfi_undefined 2644 +; WAVE64-NEXT: .cfi_undefined 2645 +; WAVE64-NEXT: .cfi_undefined 2646 +; WAVE64-NEXT: .cfi_undefined 2647 +; WAVE64-NEXT: .cfi_undefined 2656 +; WAVE64-NEXT: .cfi_undefined 2657 +; WAVE64-NEXT: .cfi_undefined 2658 +; WAVE64-NEXT: .cfi_undefined 2659 +; WAVE64-NEXT: .cfi_undefined 2660 +; WAVE64-NEXT: .cfi_undefined 2661 +; WAVE64-NEXT: .cfi_undefined 2662 +; WAVE64-NEXT: .cfi_undefined 2663 +; WAVE64-NEXT: .cfi_undefined 2672 +; WAVE64-NEXT: .cfi_undefined 2673 +; WAVE64-NEXT: .cfi_undefined 2674 +; WAVE64-NEXT: .cfi_undefined 2675 +; WAVE64-NEXT: .cfi_undefined 2676 +; WAVE64-NEXT: .cfi_undefined 2677 +; WAVE64-NEXT: .cfi_undefined 2678 +; WAVE64-NEXT: .cfi_undefined 2679 +; WAVE64-NEXT: .cfi_undefined 2688 +; WAVE64-NEXT: .cfi_undefined 2689 +; WAVE64-NEXT: .cfi_undefined 2690 +; WAVE64-NEXT: .cfi_undefined 2691 +; WAVE64-NEXT: .cfi_undefined 2692 +; WAVE64-NEXT: .cfi_undefined 2693 +; WAVE64-NEXT: .cfi_undefined 2694 +; WAVE64-NEXT: .cfi_undefined 2695 +; WAVE64-NEXT: .cfi_undefined 2704 +; WAVE64-NEXT: .cfi_undefined 2705 +; WAVE64-NEXT: .cfi_undefined 2706 +; WAVE64-NEXT: .cfi_undefined 2707 +; WAVE64-NEXT: .cfi_undefined 2708 +; WAVE64-NEXT: .cfi_undefined 2709 +; WAVE64-NEXT: .cfi_undefined 2710 +; WAVE64-NEXT: .cfi_undefined 2711 +; WAVE64-NEXT: .cfi_undefined 2720 +; WAVE64-NEXT: .cfi_undefined 2721 +; WAVE64-NEXT: .cfi_undefined 2722 +; WAVE64-NEXT: .cfi_undefined 2723 +; WAVE64-NEXT: .cfi_undefined 2724 +; WAVE64-NEXT: .cfi_undefined 2725 +; WAVE64-NEXT: .cfi_undefined 2726 +; WAVE64-NEXT: .cfi_undefined 2727 +; WAVE64-NEXT: .cfi_undefined 2736 +; WAVE64-NEXT: .cfi_undefined 2737 +; WAVE64-NEXT: .cfi_undefined 2738 +; WAVE64-NEXT: .cfi_undefined 2739 +; WAVE64-NEXT: .cfi_undefined 2740 +; WAVE64-NEXT: .cfi_undefined 2741 +; WAVE64-NEXT: .cfi_undefined 2742 +; WAVE64-NEXT: .cfi_undefined 2743 +; WAVE64-NEXT: .cfi_undefined 2752 +; WAVE64-NEXT: .cfi_undefined 2753 +; WAVE64-NEXT: .cfi_undefined 2754 +; WAVE64-NEXT: .cfi_undefined 2755 +; WAVE64-NEXT: .cfi_undefined 2756 +; WAVE64-NEXT: .cfi_undefined 2757 +; WAVE64-NEXT: .cfi_undefined 2758 +; WAVE64-NEXT: .cfi_undefined 2759 +; WAVE64-NEXT: .cfi_undefined 2768 +; WAVE64-NEXT: .cfi_undefined 2769 +; WAVE64-NEXT: .cfi_undefined 2770 +; WAVE64-NEXT: .cfi_undefined 2771 +; WAVE64-NEXT: .cfi_undefined 2772 +; WAVE64-NEXT: .cfi_undefined 2773 +; WAVE64-NEXT: .cfi_undefined 2774 +; WAVE64-NEXT: .cfi_undefined 2775 +; WAVE64-NEXT: .cfi_undefined 2784 +; WAVE64-NEXT: .cfi_undefined 2785 +; WAVE64-NEXT: .cfi_undefined 2786 +; WAVE64-NEXT: .cfi_undefined 2787 +; WAVE64-NEXT: .cfi_undefined 2788 +; WAVE64-NEXT: .cfi_undefined 2789 +; WAVE64-NEXT: .cfi_undefined 2790 +; WAVE64-NEXT: .cfi_undefined 2791 +; WAVE64-NEXT: .cfi_undefined 2800 +; WAVE64-NEXT: .cfi_undefined 2801 +; WAVE64-NEXT: .cfi_undefined 2802 +; WAVE64-NEXT: .cfi_undefined 2803 +; WAVE64-NEXT: .cfi_undefined 2804 +; WAVE64-NEXT: .cfi_undefined 2805 +; WAVE64-NEXT: .cfi_undefined 2806 +; WAVE64-NEXT: .cfi_undefined 2807 +; WAVE64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; WAVE64-NEXT: v_mov_b32_e32 v0, exec_lo +; WAVE64-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; WAVE64-NEXT: v_mov_b32_e32 v0, exec_hi +; WAVE64-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_offset 17, 0 +; WAVE64-NEXT: ;;#ASMSTART +; WAVE64-NEXT: ; clobber all VGPRs +; WAVE64-NEXT: ;;#ASMEND +; WAVE64-NEXT: s_waitcnt vmcnt(0) +; WAVE64-NEXT: s_setpc_b64 s[30:31] +; +; WAVE32-LABEL: caller_needs_to_spill_pc_to_memory: +; WAVE32: .Lfunc_begin6: +; WAVE32-NEXT: .cfi_startproc +; WAVE32-NEXT: ; %bb.0: +; WAVE32-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; WAVE32-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; WAVE32-NEXT: .cfi_undefined 1536 +; WAVE32-NEXT: .cfi_undefined 1537 +; WAVE32-NEXT: .cfi_undefined 1538 +; WAVE32-NEXT: .cfi_undefined 1539 +; WAVE32-NEXT: .cfi_undefined 1540 +; WAVE32-NEXT: .cfi_undefined 1541 +; WAVE32-NEXT: .cfi_undefined 1542 +; WAVE32-NEXT: .cfi_undefined 1543 +; WAVE32-NEXT: .cfi_undefined 1544 +; WAVE32-NEXT: .cfi_undefined 1545 +; WAVE32-NEXT: .cfi_undefined 1546 +; WAVE32-NEXT: .cfi_undefined 1547 +; WAVE32-NEXT: .cfi_undefined 1548 +; WAVE32-NEXT: .cfi_undefined 1549 +; WAVE32-NEXT: .cfi_undefined 1550 +; WAVE32-NEXT: .cfi_undefined 1551 +; WAVE32-NEXT: .cfi_undefined 1552 +; WAVE32-NEXT: .cfi_undefined 1553 +; WAVE32-NEXT: .cfi_undefined 1554 +; WAVE32-NEXT: .cfi_undefined 1555 +; WAVE32-NEXT: .cfi_undefined 1556 +; WAVE32-NEXT: .cfi_undefined 1557 +; WAVE32-NEXT: .cfi_undefined 1558 +; WAVE32-NEXT: .cfi_undefined 1559 +; WAVE32-NEXT: .cfi_undefined 1560 +; WAVE32-NEXT: .cfi_undefined 1561 +; WAVE32-NEXT: .cfi_undefined 1562 +; WAVE32-NEXT: .cfi_undefined 1563 +; WAVE32-NEXT: .cfi_undefined 1564 +; WAVE32-NEXT: .cfi_undefined 1565 +; WAVE32-NEXT: .cfi_undefined 1566 +; WAVE32-NEXT: .cfi_undefined 1567 +; WAVE32-NEXT: .cfi_undefined 1568 +; WAVE32-NEXT: .cfi_undefined 1569 +; WAVE32-NEXT: .cfi_undefined 1570 +; WAVE32-NEXT: .cfi_undefined 1571 +; WAVE32-NEXT: .cfi_undefined 1572 +; WAVE32-NEXT: .cfi_undefined 1573 +; WAVE32-NEXT: .cfi_undefined 1574 +; WAVE32-NEXT: .cfi_undefined 1575 +; WAVE32-NEXT: .cfi_undefined 1584 +; WAVE32-NEXT: .cfi_undefined 1585 +; WAVE32-NEXT: .cfi_undefined 1586 +; WAVE32-NEXT: .cfi_undefined 1587 +; WAVE32-NEXT: .cfi_undefined 1588 +; WAVE32-NEXT: .cfi_undefined 1589 +; WAVE32-NEXT: .cfi_undefined 1590 +; WAVE32-NEXT: .cfi_undefined 1591 +; WAVE32-NEXT: .cfi_undefined 1600 +; WAVE32-NEXT: .cfi_undefined 1601 +; WAVE32-NEXT: .cfi_undefined 1602 +; WAVE32-NEXT: .cfi_undefined 1603 +; WAVE32-NEXT: .cfi_undefined 1604 +; WAVE32-NEXT: .cfi_undefined 1605 +; WAVE32-NEXT: .cfi_undefined 1606 +; WAVE32-NEXT: .cfi_undefined 1607 +; WAVE32-NEXT: .cfi_undefined 1616 +; WAVE32-NEXT: .cfi_undefined 1617 +; WAVE32-NEXT: .cfi_undefined 1618 +; WAVE32-NEXT: .cfi_undefined 1619 +; WAVE32-NEXT: .cfi_undefined 1620 +; WAVE32-NEXT: .cfi_undefined 1621 +; WAVE32-NEXT: .cfi_undefined 1622 +; WAVE32-NEXT: .cfi_undefined 1623 +; WAVE32-NEXT: .cfi_undefined 1632 +; WAVE32-NEXT: .cfi_undefined 1633 +; WAVE32-NEXT: .cfi_undefined 1634 +; WAVE32-NEXT: .cfi_undefined 1635 +; WAVE32-NEXT: .cfi_undefined 1636 +; WAVE32-NEXT: .cfi_undefined 1637 +; WAVE32-NEXT: .cfi_undefined 1638 +; WAVE32-NEXT: .cfi_undefined 1639 +; WAVE32-NEXT: .cfi_undefined 1648 +; WAVE32-NEXT: .cfi_undefined 1649 +; WAVE32-NEXT: .cfi_undefined 1650 +; WAVE32-NEXT: .cfi_undefined 1651 +; WAVE32-NEXT: .cfi_undefined 1652 +; WAVE32-NEXT: .cfi_undefined 1653 +; WAVE32-NEXT: .cfi_undefined 1654 +; WAVE32-NEXT: .cfi_undefined 1655 +; WAVE32-NEXT: .cfi_undefined 1664 +; WAVE32-NEXT: .cfi_undefined 1665 +; WAVE32-NEXT: .cfi_undefined 1666 +; WAVE32-NEXT: .cfi_undefined 1667 +; WAVE32-NEXT: .cfi_undefined 1668 +; WAVE32-NEXT: .cfi_undefined 1669 +; WAVE32-NEXT: .cfi_undefined 1670 +; WAVE32-NEXT: .cfi_undefined 1671 +; WAVE32-NEXT: .cfi_undefined 1680 +; WAVE32-NEXT: .cfi_undefined 1681 +; WAVE32-NEXT: .cfi_undefined 1682 +; WAVE32-NEXT: .cfi_undefined 1683 +; WAVE32-NEXT: .cfi_undefined 1684 +; WAVE32-NEXT: .cfi_undefined 1685 +; WAVE32-NEXT: .cfi_undefined 1686 +; WAVE32-NEXT: .cfi_undefined 1687 +; WAVE32-NEXT: .cfi_undefined 1696 +; WAVE32-NEXT: .cfi_undefined 1697 +; WAVE32-NEXT: .cfi_undefined 1698 +; WAVE32-NEXT: .cfi_undefined 1699 +; WAVE32-NEXT: .cfi_undefined 1700 +; WAVE32-NEXT: .cfi_undefined 1701 +; WAVE32-NEXT: .cfi_undefined 1702 +; WAVE32-NEXT: .cfi_undefined 1703 +; WAVE32-NEXT: .cfi_undefined 1712 +; WAVE32-NEXT: .cfi_undefined 1713 +; WAVE32-NEXT: .cfi_undefined 1714 +; WAVE32-NEXT: .cfi_undefined 1715 +; WAVE32-NEXT: .cfi_undefined 1716 +; WAVE32-NEXT: .cfi_undefined 1717 +; WAVE32-NEXT: .cfi_undefined 1718 +; WAVE32-NEXT: .cfi_undefined 1719 +; WAVE32-NEXT: .cfi_undefined 1728 +; WAVE32-NEXT: .cfi_undefined 1729 +; WAVE32-NEXT: .cfi_undefined 1730 +; WAVE32-NEXT: .cfi_undefined 1731 +; WAVE32-NEXT: .cfi_undefined 1732 +; WAVE32-NEXT: .cfi_undefined 1733 +; WAVE32-NEXT: .cfi_undefined 1734 +; WAVE32-NEXT: .cfi_undefined 1735 +; WAVE32-NEXT: .cfi_undefined 1744 +; WAVE32-NEXT: .cfi_undefined 1745 +; WAVE32-NEXT: .cfi_undefined 1746 +; WAVE32-NEXT: .cfi_undefined 1747 +; WAVE32-NEXT: .cfi_undefined 1748 +; WAVE32-NEXT: .cfi_undefined 1749 +; WAVE32-NEXT: .cfi_undefined 1750 +; WAVE32-NEXT: .cfi_undefined 1751 +; WAVE32-NEXT: .cfi_undefined 1760 +; WAVE32-NEXT: .cfi_undefined 1761 +; WAVE32-NEXT: .cfi_undefined 1762 +; WAVE32-NEXT: .cfi_undefined 1763 +; WAVE32-NEXT: .cfi_undefined 1764 +; WAVE32-NEXT: .cfi_undefined 1765 +; WAVE32-NEXT: .cfi_undefined 1766 +; WAVE32-NEXT: .cfi_undefined 1767 +; WAVE32-NEXT: .cfi_undefined 1776 +; WAVE32-NEXT: .cfi_undefined 1777 +; WAVE32-NEXT: .cfi_undefined 1778 +; WAVE32-NEXT: .cfi_undefined 1779 +; WAVE32-NEXT: .cfi_undefined 1780 +; WAVE32-NEXT: .cfi_undefined 1781 +; WAVE32-NEXT: .cfi_undefined 1782 +; WAVE32-NEXT: .cfi_undefined 1783 +; WAVE32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; WAVE32-NEXT: v_mov_b32_e32 v0, exec_lo +; WAVE32-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_offset 1, 0 +; WAVE32-NEXT: ;;#ASMSTART +; WAVE32-NEXT: ; clobber all VGPRs +; WAVE32-NEXT: ;;#ASMEND +; WAVE32-NEXT: s_setpc_b64 s[30:31] + call void asm sideeffect "; clobber all VGPRs", + "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9} + ,~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19} + ,~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29} + ,~{v30},~{v31},~{v32},~{v33},~{v34},~{v35},~{v36},~{v37},~{v38},~{v39} + ,~{v40},~{v41},~{v42},~{v43},~{v44},~{v45},~{v46},~{v47},~{v48},~{v49} + ,~{v50},~{v51},~{v52},~{v53},~{v54},~{v55},~{v56},~{v57},~{v58},~{v59} + ,~{v60},~{v61},~{v62},~{v63},~{v64},~{v65},~{v66},~{v67},~{v68},~{v69} + ,~{v70},~{v71},~{v72},~{v73},~{v74},~{v75},~{v76},~{v77},~{v78},~{v79} + ,~{v80},~{v81},~{v82},~{v83},~{v84},~{v85},~{v86},~{v87},~{v88},~{v89} + ,~{v90},~{v91},~{v92},~{v93},~{v94},~{v95},~{v96},~{v97},~{v98},~{v99} + ,~{v100},~{v101},~{v102},~{v103},~{v104},~{v105},~{v106},~{v107},~{v108},~{v109} + ,~{v110},~{v111},~{v112},~{v113},~{v114},~{v115},~{v116},~{v117},~{v118},~{v119} + ,~{v120},~{v121},~{v122},~{v123},~{v124},~{v125},~{v126},~{v127},~{v128},~{v129} + ,~{v130},~{v131},~{v132},~{v133},~{v134},~{v135},~{v136},~{v137},~{v138},~{v139} + ,~{v140},~{v141},~{v142},~{v143},~{v144},~{v145},~{v146},~{v147},~{v148},~{v149} + ,~{v150},~{v151},~{v152},~{v153},~{v154},~{v155},~{v156},~{v157},~{v158},~{v159} + ,~{v160},~{v161},~{v162},~{v163},~{v164},~{v165},~{v166},~{v167},~{v168},~{v169} + ,~{v170},~{v171},~{v172},~{v173},~{v174},~{v175},~{v176},~{v177},~{v178},~{v179} + ,~{v180},~{v181},~{v182},~{v183},~{v184},~{v185},~{v186},~{v187},~{v188},~{v189} + ,~{v190},~{v191},~{v192},~{v193},~{v194},~{v195},~{v196},~{v197},~{v198},~{v199} + ,~{v200},~{v201},~{v202},~{v203},~{v204},~{v205},~{v206},~{v207},~{v208},~{v209} + ,~{v210},~{v211},~{v212},~{v213},~{v214},~{v215},~{v216},~{v217},~{v218},~{v219} + ,~{v220},~{v221},~{v222},~{v223},~{v224},~{v225},~{v226},~{v227},~{v228},~{v229} + ,~{v230},~{v231},~{v232},~{v233},~{v234},~{v235},~{v236},~{v237},~{v238},~{v239} + ,~{v240},~{v241},~{v242},~{v243},~{v244},~{v245},~{v246},~{v247},~{v248},~{v249} + ,~{v250},~{v251},~{v252},~{v253},~{v254},~{v255}" () #3 + ret void +} + +define void @need_to_spill_pc_to_mem() #3 { +; WAVE64-LABEL: need_to_spill_pc_to_mem: +; WAVE64: .Lfunc_begin7: +; WAVE64-NEXT: .cfi_startproc +; WAVE64-NEXT: ; %bb.0: +; WAVE64-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; WAVE64-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; WAVE64-NEXT: .cfi_undefined 2560 +; WAVE64-NEXT: .cfi_undefined 2561 +; WAVE64-NEXT: .cfi_undefined 2562 +; WAVE64-NEXT: .cfi_undefined 2563 +; WAVE64-NEXT: .cfi_undefined 2564 +; WAVE64-NEXT: .cfi_undefined 2565 +; WAVE64-NEXT: .cfi_undefined 2566 +; WAVE64-NEXT: .cfi_undefined 2567 +; WAVE64-NEXT: .cfi_undefined 2568 +; WAVE64-NEXT: .cfi_undefined 2569 +; WAVE64-NEXT: .cfi_undefined 2570 +; WAVE64-NEXT: .cfi_undefined 2571 +; WAVE64-NEXT: .cfi_undefined 2572 +; WAVE64-NEXT: .cfi_undefined 2573 +; WAVE64-NEXT: .cfi_undefined 2574 +; WAVE64-NEXT: .cfi_undefined 2575 +; WAVE64-NEXT: .cfi_undefined 2576 +; WAVE64-NEXT: .cfi_undefined 2577 +; WAVE64-NEXT: .cfi_undefined 2578 +; WAVE64-NEXT: .cfi_undefined 2579 +; WAVE64-NEXT: .cfi_undefined 2580 +; WAVE64-NEXT: .cfi_undefined 2581 +; WAVE64-NEXT: .cfi_undefined 2582 +; WAVE64-NEXT: .cfi_undefined 2583 +; WAVE64-NEXT: .cfi_undefined 2584 +; WAVE64-NEXT: .cfi_undefined 2585 +; WAVE64-NEXT: .cfi_undefined 2586 +; WAVE64-NEXT: .cfi_undefined 2587 +; WAVE64-NEXT: .cfi_undefined 2588 +; WAVE64-NEXT: .cfi_undefined 2589 +; WAVE64-NEXT: .cfi_undefined 2590 +; WAVE64-NEXT: .cfi_undefined 2591 +; WAVE64-NEXT: .cfi_undefined 2592 +; WAVE64-NEXT: .cfi_undefined 2593 +; WAVE64-NEXT: .cfi_undefined 2594 +; WAVE64-NEXT: .cfi_undefined 2595 +; WAVE64-NEXT: .cfi_undefined 2596 +; WAVE64-NEXT: .cfi_undefined 2597 +; WAVE64-NEXT: .cfi_undefined 2598 +; WAVE64-NEXT: .cfi_undefined 2599 +; WAVE64-NEXT: .cfi_undefined 2608 +; WAVE64-NEXT: .cfi_undefined 2609 +; WAVE64-NEXT: .cfi_undefined 2610 +; WAVE64-NEXT: .cfi_undefined 2611 +; WAVE64-NEXT: .cfi_undefined 2612 +; WAVE64-NEXT: .cfi_undefined 2613 +; WAVE64-NEXT: .cfi_undefined 2614 +; WAVE64-NEXT: .cfi_undefined 2615 +; WAVE64-NEXT: .cfi_undefined 2624 +; WAVE64-NEXT: .cfi_undefined 2625 +; WAVE64-NEXT: .cfi_undefined 2626 +; WAVE64-NEXT: .cfi_undefined 2627 +; WAVE64-NEXT: .cfi_undefined 2628 +; WAVE64-NEXT: .cfi_undefined 2629 +; WAVE64-NEXT: .cfi_undefined 2630 +; WAVE64-NEXT: .cfi_undefined 2631 +; WAVE64-NEXT: .cfi_undefined 2640 +; WAVE64-NEXT: .cfi_undefined 2641 +; WAVE64-NEXT: .cfi_undefined 2642 +; WAVE64-NEXT: .cfi_undefined 2643 +; WAVE64-NEXT: .cfi_undefined 2644 +; WAVE64-NEXT: .cfi_undefined 2645 +; WAVE64-NEXT: .cfi_undefined 2646 +; WAVE64-NEXT: .cfi_undefined 2647 +; WAVE64-NEXT: .cfi_undefined 2656 +; WAVE64-NEXT: .cfi_undefined 2657 +; WAVE64-NEXT: .cfi_undefined 2658 +; WAVE64-NEXT: .cfi_undefined 2659 +; WAVE64-NEXT: .cfi_undefined 2660 +; WAVE64-NEXT: .cfi_undefined 2661 +; WAVE64-NEXT: .cfi_undefined 2662 +; WAVE64-NEXT: .cfi_undefined 2663 +; WAVE64-NEXT: .cfi_undefined 2672 +; WAVE64-NEXT: .cfi_undefined 2673 +; WAVE64-NEXT: .cfi_undefined 2674 +; WAVE64-NEXT: .cfi_undefined 2675 +; WAVE64-NEXT: .cfi_undefined 2676 +; WAVE64-NEXT: .cfi_undefined 2677 +; WAVE64-NEXT: .cfi_undefined 2678 +; WAVE64-NEXT: .cfi_undefined 2679 +; WAVE64-NEXT: .cfi_undefined 2688 +; WAVE64-NEXT: .cfi_undefined 2689 +; WAVE64-NEXT: .cfi_undefined 2690 +; WAVE64-NEXT: .cfi_undefined 2691 +; WAVE64-NEXT: .cfi_undefined 2692 +; WAVE64-NEXT: .cfi_undefined 2693 +; WAVE64-NEXT: .cfi_undefined 2694 +; WAVE64-NEXT: .cfi_undefined 2695 +; WAVE64-NEXT: .cfi_undefined 2704 +; WAVE64-NEXT: .cfi_undefined 2705 +; WAVE64-NEXT: .cfi_undefined 2706 +; WAVE64-NEXT: .cfi_undefined 2707 +; WAVE64-NEXT: .cfi_undefined 2708 +; WAVE64-NEXT: .cfi_undefined 2709 +; WAVE64-NEXT: .cfi_undefined 2710 +; WAVE64-NEXT: .cfi_undefined 2711 +; WAVE64-NEXT: .cfi_undefined 2720 +; WAVE64-NEXT: .cfi_undefined 2721 +; WAVE64-NEXT: .cfi_undefined 2722 +; WAVE64-NEXT: .cfi_undefined 2723 +; WAVE64-NEXT: .cfi_undefined 2724 +; WAVE64-NEXT: .cfi_undefined 2725 +; WAVE64-NEXT: .cfi_undefined 2726 +; WAVE64-NEXT: .cfi_undefined 2727 +; WAVE64-NEXT: .cfi_undefined 2736 +; WAVE64-NEXT: .cfi_undefined 2737 +; WAVE64-NEXT: .cfi_undefined 2738 +; WAVE64-NEXT: .cfi_undefined 2739 +; WAVE64-NEXT: .cfi_undefined 2740 +; WAVE64-NEXT: .cfi_undefined 2741 +; WAVE64-NEXT: .cfi_undefined 2742 +; WAVE64-NEXT: .cfi_undefined 2743 +; WAVE64-NEXT: .cfi_undefined 2752 +; WAVE64-NEXT: .cfi_undefined 2753 +; WAVE64-NEXT: .cfi_undefined 2754 +; WAVE64-NEXT: .cfi_undefined 2755 +; WAVE64-NEXT: .cfi_undefined 2756 +; WAVE64-NEXT: .cfi_undefined 2757 +; WAVE64-NEXT: .cfi_undefined 2758 +; WAVE64-NEXT: .cfi_undefined 2759 +; WAVE64-NEXT: .cfi_undefined 2768 +; WAVE64-NEXT: .cfi_undefined 2769 +; WAVE64-NEXT: .cfi_undefined 2770 +; WAVE64-NEXT: .cfi_undefined 2771 +; WAVE64-NEXT: .cfi_undefined 2772 +; WAVE64-NEXT: .cfi_undefined 2773 +; WAVE64-NEXT: .cfi_undefined 2774 +; WAVE64-NEXT: .cfi_undefined 2775 +; WAVE64-NEXT: .cfi_undefined 2784 +; WAVE64-NEXT: .cfi_undefined 2785 +; WAVE64-NEXT: .cfi_undefined 2786 +; WAVE64-NEXT: .cfi_undefined 2787 +; WAVE64-NEXT: .cfi_undefined 2788 +; WAVE64-NEXT: .cfi_undefined 2789 +; WAVE64-NEXT: .cfi_undefined 2790 +; WAVE64-NEXT: .cfi_undefined 2791 +; WAVE64-NEXT: .cfi_undefined 2800 +; WAVE64-NEXT: .cfi_undefined 2801 +; WAVE64-NEXT: .cfi_undefined 2802 +; WAVE64-NEXT: .cfi_undefined 2803 +; WAVE64-NEXT: .cfi_undefined 2804 +; WAVE64-NEXT: .cfi_undefined 2805 +; WAVE64-NEXT: .cfi_undefined 2806 +; WAVE64-NEXT: .cfi_undefined 2807 +; WAVE64-NEXT: .cfi_undefined 48 +; WAVE64-NEXT: .cfi_undefined 49 +; WAVE64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; WAVE64-NEXT: s_mov_b32 s18, s33 +; WAVE64-NEXT: .cfi_register 65, 50 +; WAVE64-NEXT: s_mov_b32 s33, s32 +; WAVE64-NEXT: v_mov_b32_e32 v0, exec_lo +; WAVE64-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:456 ; 4-byte Folded Spill +; WAVE64-NEXT: v_mov_b32_e32 v0, exec_hi +; WAVE64-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:460 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_offset 17, 29184 +; WAVE64-NEXT: .cfi_def_cfa_register 65 +; WAVE64-NEXT: s_addk_i32 s32, 0x7800 +; WAVE64-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:444 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2600, 32, 17, 64, 28416 +; WAVE64-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:440 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2601, 32, 17, 64, 28160 +; WAVE64-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:436 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2602, 32, 17, 64, 27904 +; WAVE64-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:432 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2603, 32, 17, 64, 27648 +; WAVE64-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:428 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2604, 32, 17, 64, 27392 +; WAVE64-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:424 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2605, 32, 17, 64, 27136 +; WAVE64-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:420 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2606, 32, 17, 64, 26880 +; WAVE64-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:416 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2607, 32, 17, 64, 26624 +; WAVE64-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:412 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2616, 32, 17, 64, 26368 +; WAVE64-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:408 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2617, 32, 17, 64, 26112 +; WAVE64-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:404 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2618, 32, 17, 64, 25856 +; WAVE64-NEXT: buffer_store_dword v59, off, s[0:3], s33 offset:400 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2619, 32, 17, 64, 25600 +; WAVE64-NEXT: buffer_store_dword v60, off, s[0:3], s33 offset:396 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2620, 32, 17, 64, 25344 +; WAVE64-NEXT: buffer_store_dword v61, off, s[0:3], s33 offset:392 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2621, 32, 17, 64, 25088 +; WAVE64-NEXT: buffer_store_dword v62, off, s[0:3], s33 offset:388 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2622, 32, 17, 64, 24832 +; WAVE64-NEXT: buffer_store_dword v63, off, s[0:3], s33 offset:384 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2623, 32, 17, 64, 24576 +; WAVE64-NEXT: buffer_store_dword v72, off, s[0:3], s33 offset:380 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2632, 32, 17, 64, 24320 +; WAVE64-NEXT: buffer_store_dword v73, off, s[0:3], s33 offset:376 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2633, 32, 17, 64, 24064 +; WAVE64-NEXT: buffer_store_dword v74, off, s[0:3], s33 offset:372 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2634, 32, 17, 64, 23808 +; WAVE64-NEXT: buffer_store_dword v75, off, s[0:3], s33 offset:368 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2635, 32, 17, 64, 23552 +; WAVE64-NEXT: buffer_store_dword v76, off, s[0:3], s33 offset:364 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2636, 32, 17, 64, 23296 +; WAVE64-NEXT: buffer_store_dword v77, off, s[0:3], s33 offset:360 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2637, 32, 17, 64, 23040 +; WAVE64-NEXT: buffer_store_dword v78, off, s[0:3], s33 offset:356 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2638, 32, 17, 64, 22784 +; WAVE64-NEXT: buffer_store_dword v79, off, s[0:3], s33 offset:352 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2639, 32, 17, 64, 22528 +; WAVE64-NEXT: buffer_store_dword v88, off, s[0:3], s33 offset:348 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2648, 32, 17, 64, 22272 +; WAVE64-NEXT: buffer_store_dword v89, off, s[0:3], s33 offset:344 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2649, 32, 17, 64, 22016 +; WAVE64-NEXT: buffer_store_dword v90, off, s[0:3], s33 offset:340 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2650, 32, 17, 64, 21760 +; WAVE64-NEXT: buffer_store_dword v91, off, s[0:3], s33 offset:336 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2651, 32, 17, 64, 21504 +; WAVE64-NEXT: buffer_store_dword v92, off, s[0:3], s33 offset:332 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2652, 32, 17, 64, 21248 +; WAVE64-NEXT: buffer_store_dword v93, off, s[0:3], s33 offset:328 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2653, 32, 17, 64, 20992 +; WAVE64-NEXT: buffer_store_dword v94, off, s[0:3], s33 offset:324 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2654, 32, 17, 64, 20736 +; WAVE64-NEXT: buffer_store_dword v95, off, s[0:3], s33 offset:320 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2655, 32, 17, 64, 20480 +; WAVE64-NEXT: buffer_store_dword v104, off, s[0:3], s33 offset:316 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2664, 32, 17, 64, 20224 +; WAVE64-NEXT: buffer_store_dword v105, off, s[0:3], s33 offset:312 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2665, 32, 17, 64, 19968 +; WAVE64-NEXT: buffer_store_dword v106, off, s[0:3], s33 offset:308 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2666, 32, 17, 64, 19712 +; WAVE64-NEXT: buffer_store_dword v107, off, s[0:3], s33 offset:304 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2667, 32, 17, 64, 19456 +; WAVE64-NEXT: buffer_store_dword v108, off, s[0:3], s33 offset:300 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2668, 32, 17, 64, 19200 +; WAVE64-NEXT: buffer_store_dword v109, off, s[0:3], s33 offset:296 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2669, 32, 17, 64, 18944 +; WAVE64-NEXT: buffer_store_dword v110, off, s[0:3], s33 offset:292 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2670, 32, 17, 64, 18688 +; WAVE64-NEXT: buffer_store_dword v111, off, s[0:3], s33 offset:288 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2671, 32, 17, 64, 18432 +; WAVE64-NEXT: buffer_store_dword v120, off, s[0:3], s33 offset:284 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2680, 32, 17, 64, 18176 +; WAVE64-NEXT: buffer_store_dword v121, off, s[0:3], s33 offset:280 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2681, 32, 17, 64, 17920 +; WAVE64-NEXT: buffer_store_dword v122, off, s[0:3], s33 offset:276 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2682, 32, 17, 64, 17664 +; WAVE64-NEXT: buffer_store_dword v123, off, s[0:3], s33 offset:272 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2683, 32, 17, 64, 17408 +; WAVE64-NEXT: buffer_store_dword v124, off, s[0:3], s33 offset:268 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2684, 32, 17, 64, 17152 +; WAVE64-NEXT: buffer_store_dword v125, off, s[0:3], s33 offset:264 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2685, 32, 17, 64, 16896 +; WAVE64-NEXT: buffer_store_dword v126, off, s[0:3], s33 offset:260 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2686, 32, 17, 64, 16640 +; WAVE64-NEXT: buffer_store_dword v127, off, s[0:3], s33 offset:256 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2687, 32, 17, 64, 16384 +; WAVE64-NEXT: buffer_store_dword v136, off, s[0:3], s33 offset:252 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2696, 32, 17, 64, 16128 +; WAVE64-NEXT: buffer_store_dword v137, off, s[0:3], s33 offset:248 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2697, 32, 17, 64, 15872 +; WAVE64-NEXT: buffer_store_dword v138, off, s[0:3], s33 offset:244 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2698, 32, 17, 64, 15616 +; WAVE64-NEXT: buffer_store_dword v139, off, s[0:3], s33 offset:240 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2699, 32, 17, 64, 15360 +; WAVE64-NEXT: buffer_store_dword v140, off, s[0:3], s33 offset:236 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2700, 32, 17, 64, 15104 +; WAVE64-NEXT: buffer_store_dword v141, off, s[0:3], s33 offset:232 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2701, 32, 17, 64, 14848 +; WAVE64-NEXT: buffer_store_dword v142, off, s[0:3], s33 offset:228 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2702, 32, 17, 64, 14592 +; WAVE64-NEXT: buffer_store_dword v143, off, s[0:3], s33 offset:224 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2703, 32, 17, 64, 14336 +; WAVE64-NEXT: buffer_store_dword v152, off, s[0:3], s33 offset:220 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2712, 32, 17, 64, 14080 +; WAVE64-NEXT: buffer_store_dword v153, off, s[0:3], s33 offset:216 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2713, 32, 17, 64, 13824 +; WAVE64-NEXT: buffer_store_dword v154, off, s[0:3], s33 offset:212 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2714, 32, 17, 64, 13568 +; WAVE64-NEXT: buffer_store_dword v155, off, s[0:3], s33 offset:208 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2715, 32, 17, 64, 13312 +; WAVE64-NEXT: buffer_store_dword v156, off, s[0:3], s33 offset:204 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2716, 32, 17, 64, 13056 +; WAVE64-NEXT: buffer_store_dword v157, off, s[0:3], s33 offset:200 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2717, 32, 17, 64, 12800 +; WAVE64-NEXT: buffer_store_dword v158, off, s[0:3], s33 offset:196 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2718, 32, 17, 64, 12544 +; WAVE64-NEXT: buffer_store_dword v159, off, s[0:3], s33 offset:192 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2719, 32, 17, 64, 12288 +; WAVE64-NEXT: buffer_store_dword v168, off, s[0:3], s33 offset:188 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2728, 32, 17, 64, 12032 +; WAVE64-NEXT: buffer_store_dword v169, off, s[0:3], s33 offset:184 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2729, 32, 17, 64, 11776 +; WAVE64-NEXT: buffer_store_dword v170, off, s[0:3], s33 offset:180 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2730, 32, 17, 64, 11520 +; WAVE64-NEXT: buffer_store_dword v171, off, s[0:3], s33 offset:176 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2731, 32, 17, 64, 11264 +; WAVE64-NEXT: buffer_store_dword v172, off, s[0:3], s33 offset:172 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2732, 32, 17, 64, 11008 +; WAVE64-NEXT: buffer_store_dword v173, off, s[0:3], s33 offset:168 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2733, 32, 17, 64, 10752 +; WAVE64-NEXT: buffer_store_dword v174, off, s[0:3], s33 offset:164 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2734, 32, 17, 64, 10496 +; WAVE64-NEXT: buffer_store_dword v175, off, s[0:3], s33 offset:160 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2735, 32, 17, 64, 10240 +; WAVE64-NEXT: buffer_store_dword v184, off, s[0:3], s33 offset:156 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2744, 32, 17, 64, 9984 +; WAVE64-NEXT: buffer_store_dword v185, off, s[0:3], s33 offset:152 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2745, 32, 17, 64, 9728 +; WAVE64-NEXT: buffer_store_dword v186, off, s[0:3], s33 offset:148 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2746, 32, 17, 64, 9472 +; WAVE64-NEXT: buffer_store_dword v187, off, s[0:3], s33 offset:144 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2747, 32, 17, 64, 9216 +; WAVE64-NEXT: buffer_store_dword v188, off, s[0:3], s33 offset:140 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2748, 32, 17, 64, 8960 +; WAVE64-NEXT: buffer_store_dword v189, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2749, 32, 17, 64, 8704 +; WAVE64-NEXT: buffer_store_dword v190, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2750, 32, 17, 64, 8448 +; WAVE64-NEXT: buffer_store_dword v191, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2751, 32, 17, 64, 8192 +; WAVE64-NEXT: buffer_store_dword v200, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2760, 32, 17, 64, 7936 +; WAVE64-NEXT: buffer_store_dword v201, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2761, 32, 17, 64, 7680 +; WAVE64-NEXT: buffer_store_dword v202, off, s[0:3], s33 offset:116 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2762, 32, 17, 64, 7424 +; WAVE64-NEXT: buffer_store_dword v203, off, s[0:3], s33 offset:112 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2763, 32, 17, 64, 7168 +; WAVE64-NEXT: buffer_store_dword v204, off, s[0:3], s33 offset:108 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2764, 32, 17, 64, 6912 +; WAVE64-NEXT: buffer_store_dword v205, off, s[0:3], s33 offset:104 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2765, 32, 17, 64, 6656 +; WAVE64-NEXT: buffer_store_dword v206, off, s[0:3], s33 offset:100 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2766, 32, 17, 64, 6400 +; WAVE64-NEXT: buffer_store_dword v207, off, s[0:3], s33 offset:96 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2767, 32, 17, 64, 6144 +; WAVE64-NEXT: buffer_store_dword v216, off, s[0:3], s33 offset:92 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2776, 32, 17, 64, 5888 +; WAVE64-NEXT: buffer_store_dword v217, off, s[0:3], s33 offset:88 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2777, 32, 17, 64, 5632 +; WAVE64-NEXT: buffer_store_dword v218, off, s[0:3], s33 offset:84 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2778, 32, 17, 64, 5376 +; WAVE64-NEXT: buffer_store_dword v219, off, s[0:3], s33 offset:80 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2779, 32, 17, 64, 5120 +; WAVE64-NEXT: buffer_store_dword v220, off, s[0:3], s33 offset:76 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2780, 32, 17, 64, 4864 +; WAVE64-NEXT: buffer_store_dword v221, off, s[0:3], s33 offset:72 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2781, 32, 17, 64, 4608 +; WAVE64-NEXT: buffer_store_dword v222, off, s[0:3], s33 offset:68 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2782, 32, 17, 64, 4352 +; WAVE64-NEXT: buffer_store_dword v223, off, s[0:3], s33 offset:64 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2783, 32, 17, 64, 4096 +; WAVE64-NEXT: buffer_store_dword v232, off, s[0:3], s33 offset:60 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2792, 32, 17, 64, 3840 +; WAVE64-NEXT: buffer_store_dword v233, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2793, 32, 17, 64, 3584 +; WAVE64-NEXT: buffer_store_dword v234, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2794, 32, 17, 64, 3328 +; WAVE64-NEXT: buffer_store_dword v235, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2795, 32, 17, 64, 3072 +; WAVE64-NEXT: buffer_store_dword v236, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2796, 32, 17, 64, 2816 +; WAVE64-NEXT: buffer_store_dword v237, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2797, 32, 17, 64, 2560 +; WAVE64-NEXT: buffer_store_dword v238, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2798, 32, 17, 64, 2304 +; WAVE64-NEXT: buffer_store_dword v239, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2799, 32, 17, 64, 2048 +; WAVE64-NEXT: buffer_store_dword v248, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2808, 32, 17, 64, 1792 +; WAVE64-NEXT: buffer_store_dword v249, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2809, 32, 17, 64, 1536 +; WAVE64-NEXT: buffer_store_dword v250, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2810, 32, 17, 64, 1280 +; WAVE64-NEXT: buffer_store_dword v251, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2811, 32, 17, 64, 1024 +; WAVE64-NEXT: buffer_store_dword v252, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2812, 32, 17, 64, 768 +; WAVE64-NEXT: buffer_store_dword v253, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2813, 32, 17, 64, 512 +; WAVE64-NEXT: buffer_store_dword v254, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2814, 32, 17, 64, 256 +; WAVE64-NEXT: buffer_store_dword v255, off, s[0:3], s33 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_llvm_vector_offset 2815, 32, 17, 64, 0 +; WAVE64-NEXT: s_mov_b64 s[16:17], exec +; WAVE64-NEXT: s_mov_b64 exec, 3 +; WAVE64-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:464 +; WAVE64-NEXT: v_writelane_b32 v0, s30, 0 +; WAVE64-NEXT: v_writelane_b32 v0, s31, 1 +; WAVE64-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:448 ; 4-byte Folded Spill +; WAVE64-NEXT: .cfi_offset 16, 28672 +; WAVE64-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:464 +; WAVE64-NEXT: s_waitcnt vmcnt(0) +; WAVE64-NEXT: s_mov_b64 exec, s[16:17] +; WAVE64-NEXT: s_getpc_b64 s[16:17] +; WAVE64-NEXT: s_add_u32 s16, s16, caller_needs_to_spill_pc_to_memory@rel32@lo+4 +; WAVE64-NEXT: s_addc_u32 s17, s17, caller_needs_to_spill_pc_to_memory@rel32@hi+12 +; WAVE64-NEXT: s_swappc_b64 s[30:31], s[16:17] +; WAVE64-NEXT: s_mov_b64 s[4:5], exec +; WAVE64-NEXT: s_mov_b64 exec, 3 +; WAVE64-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:464 +; WAVE64-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:448 ; 4-byte Folded Reload +; WAVE64-NEXT: s_waitcnt vmcnt(0) +; WAVE64-NEXT: v_readlane_b32 s30, v0, 0 +; WAVE64-NEXT: v_readlane_b32 s31, v0, 1 +; WAVE64-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:464 +; WAVE64-NEXT: s_waitcnt vmcnt(0) +; WAVE64-NEXT: s_mov_b64 exec, s[4:5] +; WAVE64-NEXT: buffer_load_dword v255, off, s[0:3], s33 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v254, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v253, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v252, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v251, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v250, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v249, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v248, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v239, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v238, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v237, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v236, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v235, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v234, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v233, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v232, off, s[0:3], s33 offset:60 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v223, off, s[0:3], s33 offset:64 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v222, off, s[0:3], s33 offset:68 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v221, off, s[0:3], s33 offset:72 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v220, off, s[0:3], s33 offset:76 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v219, off, s[0:3], s33 offset:80 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v218, off, s[0:3], s33 offset:84 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v217, off, s[0:3], s33 offset:88 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v216, off, s[0:3], s33 offset:92 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v207, off, s[0:3], s33 offset:96 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v206, off, s[0:3], s33 offset:100 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v205, off, s[0:3], s33 offset:104 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v204, off, s[0:3], s33 offset:108 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v203, off, s[0:3], s33 offset:112 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v202, off, s[0:3], s33 offset:116 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v201, off, s[0:3], s33 offset:120 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v200, off, s[0:3], s33 offset:124 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v191, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v190, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v189, off, s[0:3], s33 offset:136 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v188, off, s[0:3], s33 offset:140 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v187, off, s[0:3], s33 offset:144 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v186, off, s[0:3], s33 offset:148 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v185, off, s[0:3], s33 offset:152 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v184, off, s[0:3], s33 offset:156 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v175, off, s[0:3], s33 offset:160 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v174, off, s[0:3], s33 offset:164 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v173, off, s[0:3], s33 offset:168 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v172, off, s[0:3], s33 offset:172 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v171, off, s[0:3], s33 offset:176 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v170, off, s[0:3], s33 offset:180 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v169, off, s[0:3], s33 offset:184 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v168, off, s[0:3], s33 offset:188 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v159, off, s[0:3], s33 offset:192 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v158, off, s[0:3], s33 offset:196 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v157, off, s[0:3], s33 offset:200 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v156, off, s[0:3], s33 offset:204 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v155, off, s[0:3], s33 offset:208 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v154, off, s[0:3], s33 offset:212 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v153, off, s[0:3], s33 offset:216 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v152, off, s[0:3], s33 offset:220 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v143, off, s[0:3], s33 offset:224 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v142, off, s[0:3], s33 offset:228 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v141, off, s[0:3], s33 offset:232 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v140, off, s[0:3], s33 offset:236 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v139, off, s[0:3], s33 offset:240 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v138, off, s[0:3], s33 offset:244 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v137, off, s[0:3], s33 offset:248 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v136, off, s[0:3], s33 offset:252 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v127, off, s[0:3], s33 offset:256 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v126, off, s[0:3], s33 offset:260 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v125, off, s[0:3], s33 offset:264 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v124, off, s[0:3], s33 offset:268 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v123, off, s[0:3], s33 offset:272 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v122, off, s[0:3], s33 offset:276 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v121, off, s[0:3], s33 offset:280 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v120, off, s[0:3], s33 offset:284 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v111, off, s[0:3], s33 offset:288 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v110, off, s[0:3], s33 offset:292 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v109, off, s[0:3], s33 offset:296 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v108, off, s[0:3], s33 offset:300 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v107, off, s[0:3], s33 offset:304 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v106, off, s[0:3], s33 offset:308 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v105, off, s[0:3], s33 offset:312 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v104, off, s[0:3], s33 offset:316 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v95, off, s[0:3], s33 offset:320 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v94, off, s[0:3], s33 offset:324 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v93, off, s[0:3], s33 offset:328 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v92, off, s[0:3], s33 offset:332 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v91, off, s[0:3], s33 offset:336 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v90, off, s[0:3], s33 offset:340 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v89, off, s[0:3], s33 offset:344 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v88, off, s[0:3], s33 offset:348 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v79, off, s[0:3], s33 offset:352 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v78, off, s[0:3], s33 offset:356 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v77, off, s[0:3], s33 offset:360 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v76, off, s[0:3], s33 offset:364 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v75, off, s[0:3], s33 offset:368 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v74, off, s[0:3], s33 offset:372 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v73, off, s[0:3], s33 offset:376 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v72, off, s[0:3], s33 offset:380 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v63, off, s[0:3], s33 offset:384 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v62, off, s[0:3], s33 offset:388 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v61, off, s[0:3], s33 offset:392 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v60, off, s[0:3], s33 offset:396 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v59, off, s[0:3], s33 offset:400 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v58, off, s[0:3], s33 offset:404 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v57, off, s[0:3], s33 offset:408 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v56, off, s[0:3], s33 offset:412 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v47, off, s[0:3], s33 offset:416 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v46, off, s[0:3], s33 offset:420 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v45, off, s[0:3], s33 offset:424 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v44, off, s[0:3], s33 offset:428 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:432 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:436 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:440 ; 4-byte Folded Reload +; WAVE64-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:444 ; 4-byte Folded Reload +; WAVE64-NEXT: s_mov_b32 s32, s33 +; WAVE64-NEXT: .cfi_def_cfa_register 64 +; WAVE64-NEXT: s_mov_b32 s33, s18 +; WAVE64-NEXT: s_waitcnt vmcnt(0) +; WAVE64-NEXT: s_setpc_b64 s[30:31] +; +; WAVE32-LABEL: need_to_spill_pc_to_mem: +; WAVE32: .Lfunc_begin7: +; WAVE32-NEXT: .cfi_startproc +; WAVE32-NEXT: ; %bb.0: +; WAVE32-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; WAVE32-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; WAVE32-NEXT: .cfi_undefined 1536 +; WAVE32-NEXT: .cfi_undefined 1537 +; WAVE32-NEXT: .cfi_undefined 1538 +; WAVE32-NEXT: .cfi_undefined 1539 +; WAVE32-NEXT: .cfi_undefined 1540 +; WAVE32-NEXT: .cfi_undefined 1541 +; WAVE32-NEXT: .cfi_undefined 1542 +; WAVE32-NEXT: .cfi_undefined 1543 +; WAVE32-NEXT: .cfi_undefined 1544 +; WAVE32-NEXT: .cfi_undefined 1545 +; WAVE32-NEXT: .cfi_undefined 1546 +; WAVE32-NEXT: .cfi_undefined 1547 +; WAVE32-NEXT: .cfi_undefined 1548 +; WAVE32-NEXT: .cfi_undefined 1549 +; WAVE32-NEXT: .cfi_undefined 1550 +; WAVE32-NEXT: .cfi_undefined 1551 +; WAVE32-NEXT: .cfi_undefined 1552 +; WAVE32-NEXT: .cfi_undefined 1553 +; WAVE32-NEXT: .cfi_undefined 1554 +; WAVE32-NEXT: .cfi_undefined 1555 +; WAVE32-NEXT: .cfi_undefined 1556 +; WAVE32-NEXT: .cfi_undefined 1557 +; WAVE32-NEXT: .cfi_undefined 1558 +; WAVE32-NEXT: .cfi_undefined 1559 +; WAVE32-NEXT: .cfi_undefined 1560 +; WAVE32-NEXT: .cfi_undefined 1561 +; WAVE32-NEXT: .cfi_undefined 1562 +; WAVE32-NEXT: .cfi_undefined 1563 +; WAVE32-NEXT: .cfi_undefined 1564 +; WAVE32-NEXT: .cfi_undefined 1565 +; WAVE32-NEXT: .cfi_undefined 1566 +; WAVE32-NEXT: .cfi_undefined 1567 +; WAVE32-NEXT: .cfi_undefined 1568 +; WAVE32-NEXT: .cfi_undefined 1569 +; WAVE32-NEXT: .cfi_undefined 1570 +; WAVE32-NEXT: .cfi_undefined 1571 +; WAVE32-NEXT: .cfi_undefined 1572 +; WAVE32-NEXT: .cfi_undefined 1573 +; WAVE32-NEXT: .cfi_undefined 1574 +; WAVE32-NEXT: .cfi_undefined 1575 +; WAVE32-NEXT: .cfi_undefined 1584 +; WAVE32-NEXT: .cfi_undefined 1585 +; WAVE32-NEXT: .cfi_undefined 1586 +; WAVE32-NEXT: .cfi_undefined 1587 +; WAVE32-NEXT: .cfi_undefined 1588 +; WAVE32-NEXT: .cfi_undefined 1589 +; WAVE32-NEXT: .cfi_undefined 1590 +; WAVE32-NEXT: .cfi_undefined 1591 +; WAVE32-NEXT: .cfi_undefined 1600 +; WAVE32-NEXT: .cfi_undefined 1601 +; WAVE32-NEXT: .cfi_undefined 1602 +; WAVE32-NEXT: .cfi_undefined 1603 +; WAVE32-NEXT: .cfi_undefined 1604 +; WAVE32-NEXT: .cfi_undefined 1605 +; WAVE32-NEXT: .cfi_undefined 1606 +; WAVE32-NEXT: .cfi_undefined 1607 +; WAVE32-NEXT: .cfi_undefined 1616 +; WAVE32-NEXT: .cfi_undefined 1617 +; WAVE32-NEXT: .cfi_undefined 1618 +; WAVE32-NEXT: .cfi_undefined 1619 +; WAVE32-NEXT: .cfi_undefined 1620 +; WAVE32-NEXT: .cfi_undefined 1621 +; WAVE32-NEXT: .cfi_undefined 1622 +; WAVE32-NEXT: .cfi_undefined 1623 +; WAVE32-NEXT: .cfi_undefined 1632 +; WAVE32-NEXT: .cfi_undefined 1633 +; WAVE32-NEXT: .cfi_undefined 1634 +; WAVE32-NEXT: .cfi_undefined 1635 +; WAVE32-NEXT: .cfi_undefined 1636 +; WAVE32-NEXT: .cfi_undefined 1637 +; WAVE32-NEXT: .cfi_undefined 1638 +; WAVE32-NEXT: .cfi_undefined 1639 +; WAVE32-NEXT: .cfi_undefined 1648 +; WAVE32-NEXT: .cfi_undefined 1649 +; WAVE32-NEXT: .cfi_undefined 1650 +; WAVE32-NEXT: .cfi_undefined 1651 +; WAVE32-NEXT: .cfi_undefined 1652 +; WAVE32-NEXT: .cfi_undefined 1653 +; WAVE32-NEXT: .cfi_undefined 1654 +; WAVE32-NEXT: .cfi_undefined 1655 +; WAVE32-NEXT: .cfi_undefined 1664 +; WAVE32-NEXT: .cfi_undefined 1665 +; WAVE32-NEXT: .cfi_undefined 1666 +; WAVE32-NEXT: .cfi_undefined 1667 +; WAVE32-NEXT: .cfi_undefined 1668 +; WAVE32-NEXT: .cfi_undefined 1669 +; WAVE32-NEXT: .cfi_undefined 1670 +; WAVE32-NEXT: .cfi_undefined 1671 +; WAVE32-NEXT: .cfi_undefined 1680 +; WAVE32-NEXT: .cfi_undefined 1681 +; WAVE32-NEXT: .cfi_undefined 1682 +; WAVE32-NEXT: .cfi_undefined 1683 +; WAVE32-NEXT: .cfi_undefined 1684 +; WAVE32-NEXT: .cfi_undefined 1685 +; WAVE32-NEXT: .cfi_undefined 1686 +; WAVE32-NEXT: .cfi_undefined 1687 +; WAVE32-NEXT: .cfi_undefined 1696 +; WAVE32-NEXT: .cfi_undefined 1697 +; WAVE32-NEXT: .cfi_undefined 1698 +; WAVE32-NEXT: .cfi_undefined 1699 +; WAVE32-NEXT: .cfi_undefined 1700 +; WAVE32-NEXT: .cfi_undefined 1701 +; WAVE32-NEXT: .cfi_undefined 1702 +; WAVE32-NEXT: .cfi_undefined 1703 +; WAVE32-NEXT: .cfi_undefined 1712 +; WAVE32-NEXT: .cfi_undefined 1713 +; WAVE32-NEXT: .cfi_undefined 1714 +; WAVE32-NEXT: .cfi_undefined 1715 +; WAVE32-NEXT: .cfi_undefined 1716 +; WAVE32-NEXT: .cfi_undefined 1717 +; WAVE32-NEXT: .cfi_undefined 1718 +; WAVE32-NEXT: .cfi_undefined 1719 +; WAVE32-NEXT: .cfi_undefined 1728 +; WAVE32-NEXT: .cfi_undefined 1729 +; WAVE32-NEXT: .cfi_undefined 1730 +; WAVE32-NEXT: .cfi_undefined 1731 +; WAVE32-NEXT: .cfi_undefined 1732 +; WAVE32-NEXT: .cfi_undefined 1733 +; WAVE32-NEXT: .cfi_undefined 1734 +; WAVE32-NEXT: .cfi_undefined 1735 +; WAVE32-NEXT: .cfi_undefined 1744 +; WAVE32-NEXT: .cfi_undefined 1745 +; WAVE32-NEXT: .cfi_undefined 1746 +; WAVE32-NEXT: .cfi_undefined 1747 +; WAVE32-NEXT: .cfi_undefined 1748 +; WAVE32-NEXT: .cfi_undefined 1749 +; WAVE32-NEXT: .cfi_undefined 1750 +; WAVE32-NEXT: .cfi_undefined 1751 +; WAVE32-NEXT: .cfi_undefined 1760 +; WAVE32-NEXT: .cfi_undefined 1761 +; WAVE32-NEXT: .cfi_undefined 1762 +; WAVE32-NEXT: .cfi_undefined 1763 +; WAVE32-NEXT: .cfi_undefined 1764 +; WAVE32-NEXT: .cfi_undefined 1765 +; WAVE32-NEXT: .cfi_undefined 1766 +; WAVE32-NEXT: .cfi_undefined 1767 +; WAVE32-NEXT: .cfi_undefined 1776 +; WAVE32-NEXT: .cfi_undefined 1777 +; WAVE32-NEXT: .cfi_undefined 1778 +; WAVE32-NEXT: .cfi_undefined 1779 +; WAVE32-NEXT: .cfi_undefined 1780 +; WAVE32-NEXT: .cfi_undefined 1781 +; WAVE32-NEXT: .cfi_undefined 1782 +; WAVE32-NEXT: .cfi_undefined 1783 +; WAVE32-NEXT: .cfi_undefined 48 +; WAVE32-NEXT: .cfi_undefined 49 +; WAVE32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; WAVE32-NEXT: s_mov_b32 s18, s33 +; WAVE32-NEXT: .cfi_register 65, 50 +; WAVE32-NEXT: v_mov_b32_e32 v0, exec_lo +; WAVE32-NEXT: s_mov_b32 s33, s32 +; WAVE32-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:456 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_offset 1, 14592 +; WAVE32-NEXT: .cfi_def_cfa_register 65 +; WAVE32-NEXT: s_addk_i32 s32, 0x3a00 +; WAVE32-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:444 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1576, 32, 1, 32, 14208 +; WAVE32-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:440 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1577, 32, 1, 32, 14080 +; WAVE32-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:436 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1578, 32, 1, 32, 13952 +; WAVE32-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:432 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1579, 32, 1, 32, 13824 +; WAVE32-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:428 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1580, 32, 1, 32, 13696 +; WAVE32-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:424 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1581, 32, 1, 32, 13568 +; WAVE32-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:420 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1582, 32, 1, 32, 13440 +; WAVE32-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:416 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1583, 32, 1, 32, 13312 +; WAVE32-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:412 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1592, 32, 1, 32, 13184 +; WAVE32-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:408 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1593, 32, 1, 32, 13056 +; WAVE32-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:404 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1594, 32, 1, 32, 12928 +; WAVE32-NEXT: buffer_store_dword v59, off, s[0:3], s33 offset:400 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1595, 32, 1, 32, 12800 +; WAVE32-NEXT: buffer_store_dword v60, off, s[0:3], s33 offset:396 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1596, 32, 1, 32, 12672 +; WAVE32-NEXT: buffer_store_dword v61, off, s[0:3], s33 offset:392 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1597, 32, 1, 32, 12544 +; WAVE32-NEXT: buffer_store_dword v62, off, s[0:3], s33 offset:388 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1598, 32, 1, 32, 12416 +; WAVE32-NEXT: buffer_store_dword v63, off, s[0:3], s33 offset:384 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1599, 32, 1, 32, 12288 +; WAVE32-NEXT: buffer_store_dword v72, off, s[0:3], s33 offset:380 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1608, 32, 1, 32, 12160 +; WAVE32-NEXT: buffer_store_dword v73, off, s[0:3], s33 offset:376 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1609, 32, 1, 32, 12032 +; WAVE32-NEXT: buffer_store_dword v74, off, s[0:3], s33 offset:372 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1610, 32, 1, 32, 11904 +; WAVE32-NEXT: buffer_store_dword v75, off, s[0:3], s33 offset:368 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1611, 32, 1, 32, 11776 +; WAVE32-NEXT: buffer_store_dword v76, off, s[0:3], s33 offset:364 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1612, 32, 1, 32, 11648 +; WAVE32-NEXT: buffer_store_dword v77, off, s[0:3], s33 offset:360 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1613, 32, 1, 32, 11520 +; WAVE32-NEXT: buffer_store_dword v78, off, s[0:3], s33 offset:356 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1614, 32, 1, 32, 11392 +; WAVE32-NEXT: buffer_store_dword v79, off, s[0:3], s33 offset:352 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1615, 32, 1, 32, 11264 +; WAVE32-NEXT: buffer_store_dword v88, off, s[0:3], s33 offset:348 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1624, 32, 1, 32, 11136 +; WAVE32-NEXT: buffer_store_dword v89, off, s[0:3], s33 offset:344 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1625, 32, 1, 32, 11008 +; WAVE32-NEXT: buffer_store_dword v90, off, s[0:3], s33 offset:340 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1626, 32, 1, 32, 10880 +; WAVE32-NEXT: buffer_store_dword v91, off, s[0:3], s33 offset:336 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1627, 32, 1, 32, 10752 +; WAVE32-NEXT: buffer_store_dword v92, off, s[0:3], s33 offset:332 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1628, 32, 1, 32, 10624 +; WAVE32-NEXT: buffer_store_dword v93, off, s[0:3], s33 offset:328 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1629, 32, 1, 32, 10496 +; WAVE32-NEXT: buffer_store_dword v94, off, s[0:3], s33 offset:324 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1630, 32, 1, 32, 10368 +; WAVE32-NEXT: buffer_store_dword v95, off, s[0:3], s33 offset:320 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1631, 32, 1, 32, 10240 +; WAVE32-NEXT: buffer_store_dword v104, off, s[0:3], s33 offset:316 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1640, 32, 1, 32, 10112 +; WAVE32-NEXT: buffer_store_dword v105, off, s[0:3], s33 offset:312 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1641, 32, 1, 32, 9984 +; WAVE32-NEXT: buffer_store_dword v106, off, s[0:3], s33 offset:308 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1642, 32, 1, 32, 9856 +; WAVE32-NEXT: buffer_store_dword v107, off, s[0:3], s33 offset:304 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1643, 32, 1, 32, 9728 +; WAVE32-NEXT: buffer_store_dword v108, off, s[0:3], s33 offset:300 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1644, 32, 1, 32, 9600 +; WAVE32-NEXT: buffer_store_dword v109, off, s[0:3], s33 offset:296 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1645, 32, 1, 32, 9472 +; WAVE32-NEXT: buffer_store_dword v110, off, s[0:3], s33 offset:292 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1646, 32, 1, 32, 9344 +; WAVE32-NEXT: buffer_store_dword v111, off, s[0:3], s33 offset:288 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1647, 32, 1, 32, 9216 +; WAVE32-NEXT: buffer_store_dword v120, off, s[0:3], s33 offset:284 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1656, 32, 1, 32, 9088 +; WAVE32-NEXT: buffer_store_dword v121, off, s[0:3], s33 offset:280 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1657, 32, 1, 32, 8960 +; WAVE32-NEXT: buffer_store_dword v122, off, s[0:3], s33 offset:276 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1658, 32, 1, 32, 8832 +; WAVE32-NEXT: buffer_store_dword v123, off, s[0:3], s33 offset:272 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1659, 32, 1, 32, 8704 +; WAVE32-NEXT: buffer_store_dword v124, off, s[0:3], s33 offset:268 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1660, 32, 1, 32, 8576 +; WAVE32-NEXT: buffer_store_dword v125, off, s[0:3], s33 offset:264 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1661, 32, 1, 32, 8448 +; WAVE32-NEXT: buffer_store_dword v126, off, s[0:3], s33 offset:260 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1662, 32, 1, 32, 8320 +; WAVE32-NEXT: buffer_store_dword v127, off, s[0:3], s33 offset:256 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1663, 32, 1, 32, 8192 +; WAVE32-NEXT: buffer_store_dword v136, off, s[0:3], s33 offset:252 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1672, 32, 1, 32, 8064 +; WAVE32-NEXT: buffer_store_dword v137, off, s[0:3], s33 offset:248 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1673, 32, 1, 32, 7936 +; WAVE32-NEXT: buffer_store_dword v138, off, s[0:3], s33 offset:244 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1674, 32, 1, 32, 7808 +; WAVE32-NEXT: buffer_store_dword v139, off, s[0:3], s33 offset:240 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1675, 32, 1, 32, 7680 +; WAVE32-NEXT: buffer_store_dword v140, off, s[0:3], s33 offset:236 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1676, 32, 1, 32, 7552 +; WAVE32-NEXT: buffer_store_dword v141, off, s[0:3], s33 offset:232 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1677, 32, 1, 32, 7424 +; WAVE32-NEXT: buffer_store_dword v142, off, s[0:3], s33 offset:228 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1678, 32, 1, 32, 7296 +; WAVE32-NEXT: buffer_store_dword v143, off, s[0:3], s33 offset:224 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1679, 32, 1, 32, 7168 +; WAVE32-NEXT: buffer_store_dword v152, off, s[0:3], s33 offset:220 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1688, 32, 1, 32, 7040 +; WAVE32-NEXT: buffer_store_dword v153, off, s[0:3], s33 offset:216 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1689, 32, 1, 32, 6912 +; WAVE32-NEXT: buffer_store_dword v154, off, s[0:3], s33 offset:212 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1690, 32, 1, 32, 6784 +; WAVE32-NEXT: buffer_store_dword v155, off, s[0:3], s33 offset:208 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1691, 32, 1, 32, 6656 +; WAVE32-NEXT: buffer_store_dword v156, off, s[0:3], s33 offset:204 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1692, 32, 1, 32, 6528 +; WAVE32-NEXT: buffer_store_dword v157, off, s[0:3], s33 offset:200 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1693, 32, 1, 32, 6400 +; WAVE32-NEXT: buffer_store_dword v158, off, s[0:3], s33 offset:196 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1694, 32, 1, 32, 6272 +; WAVE32-NEXT: buffer_store_dword v159, off, s[0:3], s33 offset:192 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1695, 32, 1, 32, 6144 +; WAVE32-NEXT: buffer_store_dword v168, off, s[0:3], s33 offset:188 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1704, 32, 1, 32, 6016 +; WAVE32-NEXT: buffer_store_dword v169, off, s[0:3], s33 offset:184 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1705, 32, 1, 32, 5888 +; WAVE32-NEXT: buffer_store_dword v170, off, s[0:3], s33 offset:180 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1706, 32, 1, 32, 5760 +; WAVE32-NEXT: buffer_store_dword v171, off, s[0:3], s33 offset:176 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1707, 32, 1, 32, 5632 +; WAVE32-NEXT: buffer_store_dword v172, off, s[0:3], s33 offset:172 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1708, 32, 1, 32, 5504 +; WAVE32-NEXT: buffer_store_dword v173, off, s[0:3], s33 offset:168 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1709, 32, 1, 32, 5376 +; WAVE32-NEXT: buffer_store_dword v174, off, s[0:3], s33 offset:164 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1710, 32, 1, 32, 5248 +; WAVE32-NEXT: buffer_store_dword v175, off, s[0:3], s33 offset:160 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1711, 32, 1, 32, 5120 +; WAVE32-NEXT: buffer_store_dword v184, off, s[0:3], s33 offset:156 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1720, 32, 1, 32, 4992 +; WAVE32-NEXT: buffer_store_dword v185, off, s[0:3], s33 offset:152 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1721, 32, 1, 32, 4864 +; WAVE32-NEXT: buffer_store_dword v186, off, s[0:3], s33 offset:148 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1722, 32, 1, 32, 4736 +; WAVE32-NEXT: buffer_store_dword v187, off, s[0:3], s33 offset:144 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1723, 32, 1, 32, 4608 +; WAVE32-NEXT: buffer_store_dword v188, off, s[0:3], s33 offset:140 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1724, 32, 1, 32, 4480 +; WAVE32-NEXT: buffer_store_dword v189, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1725, 32, 1, 32, 4352 +; WAVE32-NEXT: buffer_store_dword v190, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1726, 32, 1, 32, 4224 +; WAVE32-NEXT: buffer_store_dword v191, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1727, 32, 1, 32, 4096 +; WAVE32-NEXT: buffer_store_dword v200, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1736, 32, 1, 32, 3968 +; WAVE32-NEXT: buffer_store_dword v201, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1737, 32, 1, 32, 3840 +; WAVE32-NEXT: buffer_store_dword v202, off, s[0:3], s33 offset:116 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1738, 32, 1, 32, 3712 +; WAVE32-NEXT: buffer_store_dword v203, off, s[0:3], s33 offset:112 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1739, 32, 1, 32, 3584 +; WAVE32-NEXT: buffer_store_dword v204, off, s[0:3], s33 offset:108 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1740, 32, 1, 32, 3456 +; WAVE32-NEXT: buffer_store_dword v205, off, s[0:3], s33 offset:104 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1741, 32, 1, 32, 3328 +; WAVE32-NEXT: buffer_store_dword v206, off, s[0:3], s33 offset:100 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1742, 32, 1, 32, 3200 +; WAVE32-NEXT: buffer_store_dword v207, off, s[0:3], s33 offset:96 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1743, 32, 1, 32, 3072 +; WAVE32-NEXT: buffer_store_dword v216, off, s[0:3], s33 offset:92 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1752, 32, 1, 32, 2944 +; WAVE32-NEXT: buffer_store_dword v217, off, s[0:3], s33 offset:88 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1753, 32, 1, 32, 2816 +; WAVE32-NEXT: buffer_store_dword v218, off, s[0:3], s33 offset:84 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1754, 32, 1, 32, 2688 +; WAVE32-NEXT: buffer_store_dword v219, off, s[0:3], s33 offset:80 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1755, 32, 1, 32, 2560 +; WAVE32-NEXT: buffer_store_dword v220, off, s[0:3], s33 offset:76 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1756, 32, 1, 32, 2432 +; WAVE32-NEXT: buffer_store_dword v221, off, s[0:3], s33 offset:72 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1757, 32, 1, 32, 2304 +; WAVE32-NEXT: buffer_store_dword v222, off, s[0:3], s33 offset:68 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1758, 32, 1, 32, 2176 +; WAVE32-NEXT: buffer_store_dword v223, off, s[0:3], s33 offset:64 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1759, 32, 1, 32, 2048 +; WAVE32-NEXT: buffer_store_dword v232, off, s[0:3], s33 offset:60 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1768, 32, 1, 32, 1920 +; WAVE32-NEXT: buffer_store_dword v233, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1769, 32, 1, 32, 1792 +; WAVE32-NEXT: buffer_store_dword v234, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1770, 32, 1, 32, 1664 +; WAVE32-NEXT: buffer_store_dword v235, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1771, 32, 1, 32, 1536 +; WAVE32-NEXT: buffer_store_dword v236, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1772, 32, 1, 32, 1408 +; WAVE32-NEXT: buffer_store_dword v237, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1773, 32, 1, 32, 1280 +; WAVE32-NEXT: buffer_store_dword v238, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1774, 32, 1, 32, 1152 +; WAVE32-NEXT: buffer_store_dword v239, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1775, 32, 1, 32, 1024 +; WAVE32-NEXT: buffer_store_dword v248, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1784, 32, 1, 32, 896 +; WAVE32-NEXT: buffer_store_dword v249, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1785, 32, 1, 32, 768 +; WAVE32-NEXT: buffer_store_dword v250, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1786, 32, 1, 32, 640 +; WAVE32-NEXT: buffer_store_dword v251, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1787, 32, 1, 32, 512 +; WAVE32-NEXT: buffer_store_dword v252, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1788, 32, 1, 32, 384 +; WAVE32-NEXT: buffer_store_dword v253, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1789, 32, 1, 32, 256 +; WAVE32-NEXT: buffer_store_dword v254, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1790, 32, 1, 32, 128 +; WAVE32-NEXT: buffer_store_dword v255, off, s[0:3], s33 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1791, 32, 1, 32, 0 +; WAVE32-NEXT: s_mov_b32 s16, exec_lo +; WAVE32-NEXT: s_waitcnt_depctr 0xffe3 +; WAVE32-NEXT: s_mov_b32 exec_lo, 3 +; WAVE32-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:460 +; WAVE32-NEXT: v_writelane_b32 v0, s30, 0 +; WAVE32-NEXT: v_writelane_b32 v0, s31, 1 +; WAVE32-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:448 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_offset 16, 14336 +; WAVE32-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:460 +; WAVE32-NEXT: s_waitcnt vmcnt(0) +; WAVE32-NEXT: s_waitcnt_depctr 0xffe3 +; WAVE32-NEXT: s_mov_b32 exec_lo, s16 +; WAVE32-NEXT: s_getpc_b64 s[16:17] +; WAVE32-NEXT: s_add_u32 s16, s16, caller_needs_to_spill_pc_to_memory@rel32@lo+4 +; WAVE32-NEXT: s_addc_u32 s17, s17, caller_needs_to_spill_pc_to_memory@rel32@hi+12 +; WAVE32-NEXT: s_swappc_b64 s[30:31], s[16:17] +; WAVE32-NEXT: s_mov_b32 s4, exec_lo +; WAVE32-NEXT: s_mov_b32 exec_lo, 3 +; WAVE32-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:460 +; WAVE32-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:448 ; 4-byte Folded Reload +; WAVE32-NEXT: s_waitcnt vmcnt(0) +; WAVE32-NEXT: v_readlane_b32 s30, v0, 0 +; WAVE32-NEXT: v_readlane_b32 s31, v0, 1 +; WAVE32-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:460 +; WAVE32-NEXT: s_waitcnt vmcnt(0) +; WAVE32-NEXT: s_waitcnt_depctr 0xffe3 +; WAVE32-NEXT: s_mov_b32 exec_lo, s4 +; WAVE32-NEXT: s_clause 0x3e +; WAVE32-NEXT: buffer_load_dword v255, off, s[0:3], s33 +; WAVE32-NEXT: buffer_load_dword v254, off, s[0:3], s33 offset:4 +; WAVE32-NEXT: buffer_load_dword v253, off, s[0:3], s33 offset:8 +; WAVE32-NEXT: buffer_load_dword v252, off, s[0:3], s33 offset:12 +; WAVE32-NEXT: buffer_load_dword v251, off, s[0:3], s33 offset:16 +; WAVE32-NEXT: buffer_load_dword v250, off, s[0:3], s33 offset:20 +; WAVE32-NEXT: buffer_load_dword v249, off, s[0:3], s33 offset:24 +; WAVE32-NEXT: buffer_load_dword v248, off, s[0:3], s33 offset:28 +; WAVE32-NEXT: buffer_load_dword v239, off, s[0:3], s33 offset:32 +; WAVE32-NEXT: buffer_load_dword v238, off, s[0:3], s33 offset:36 +; WAVE32-NEXT: buffer_load_dword v237, off, s[0:3], s33 offset:40 +; WAVE32-NEXT: buffer_load_dword v236, off, s[0:3], s33 offset:44 +; WAVE32-NEXT: buffer_load_dword v235, off, s[0:3], s33 offset:48 +; WAVE32-NEXT: buffer_load_dword v234, off, s[0:3], s33 offset:52 +; WAVE32-NEXT: buffer_load_dword v233, off, s[0:3], s33 offset:56 +; WAVE32-NEXT: buffer_load_dword v232, off, s[0:3], s33 offset:60 +; WAVE32-NEXT: buffer_load_dword v223, off, s[0:3], s33 offset:64 +; WAVE32-NEXT: buffer_load_dword v222, off, s[0:3], s33 offset:68 +; WAVE32-NEXT: buffer_load_dword v221, off, s[0:3], s33 offset:72 +; WAVE32-NEXT: buffer_load_dword v220, off, s[0:3], s33 offset:76 +; WAVE32-NEXT: buffer_load_dword v219, off, s[0:3], s33 offset:80 +; WAVE32-NEXT: buffer_load_dword v218, off, s[0:3], s33 offset:84 +; WAVE32-NEXT: buffer_load_dword v217, off, s[0:3], s33 offset:88 +; WAVE32-NEXT: buffer_load_dword v216, off, s[0:3], s33 offset:92 +; WAVE32-NEXT: buffer_load_dword v207, off, s[0:3], s33 offset:96 +; WAVE32-NEXT: buffer_load_dword v206, off, s[0:3], s33 offset:100 +; WAVE32-NEXT: buffer_load_dword v205, off, s[0:3], s33 offset:104 +; WAVE32-NEXT: buffer_load_dword v204, off, s[0:3], s33 offset:108 +; WAVE32-NEXT: buffer_load_dword v203, off, s[0:3], s33 offset:112 +; WAVE32-NEXT: buffer_load_dword v202, off, s[0:3], s33 offset:116 +; WAVE32-NEXT: buffer_load_dword v201, off, s[0:3], s33 offset:120 +; WAVE32-NEXT: buffer_load_dword v200, off, s[0:3], s33 offset:124 +; WAVE32-NEXT: buffer_load_dword v191, off, s[0:3], s33 offset:128 +; WAVE32-NEXT: buffer_load_dword v190, off, s[0:3], s33 offset:132 +; WAVE32-NEXT: buffer_load_dword v189, off, s[0:3], s33 offset:136 +; WAVE32-NEXT: buffer_load_dword v188, off, s[0:3], s33 offset:140 +; WAVE32-NEXT: buffer_load_dword v187, off, s[0:3], s33 offset:144 +; WAVE32-NEXT: buffer_load_dword v186, off, s[0:3], s33 offset:148 +; WAVE32-NEXT: buffer_load_dword v185, off, s[0:3], s33 offset:152 +; WAVE32-NEXT: buffer_load_dword v184, off, s[0:3], s33 offset:156 +; WAVE32-NEXT: buffer_load_dword v175, off, s[0:3], s33 offset:160 +; WAVE32-NEXT: buffer_load_dword v174, off, s[0:3], s33 offset:164 +; WAVE32-NEXT: buffer_load_dword v173, off, s[0:3], s33 offset:168 +; WAVE32-NEXT: buffer_load_dword v172, off, s[0:3], s33 offset:172 +; WAVE32-NEXT: buffer_load_dword v171, off, s[0:3], s33 offset:176 +; WAVE32-NEXT: buffer_load_dword v170, off, s[0:3], s33 offset:180 +; WAVE32-NEXT: buffer_load_dword v169, off, s[0:3], s33 offset:184 +; WAVE32-NEXT: buffer_load_dword v168, off, s[0:3], s33 offset:188 +; WAVE32-NEXT: buffer_load_dword v159, off, s[0:3], s33 offset:192 +; WAVE32-NEXT: buffer_load_dword v158, off, s[0:3], s33 offset:196 +; WAVE32-NEXT: buffer_load_dword v157, off, s[0:3], s33 offset:200 +; WAVE32-NEXT: buffer_load_dword v156, off, s[0:3], s33 offset:204 +; WAVE32-NEXT: buffer_load_dword v155, off, s[0:3], s33 offset:208 +; WAVE32-NEXT: buffer_load_dword v154, off, s[0:3], s33 offset:212 +; WAVE32-NEXT: buffer_load_dword v153, off, s[0:3], s33 offset:216 +; WAVE32-NEXT: buffer_load_dword v152, off, s[0:3], s33 offset:220 +; WAVE32-NEXT: buffer_load_dword v143, off, s[0:3], s33 offset:224 +; WAVE32-NEXT: buffer_load_dword v142, off, s[0:3], s33 offset:228 +; WAVE32-NEXT: buffer_load_dword v141, off, s[0:3], s33 offset:232 +; WAVE32-NEXT: buffer_load_dword v140, off, s[0:3], s33 offset:236 +; WAVE32-NEXT: buffer_load_dword v139, off, s[0:3], s33 offset:240 +; WAVE32-NEXT: buffer_load_dword v138, off, s[0:3], s33 offset:244 +; WAVE32-NEXT: buffer_load_dword v137, off, s[0:3], s33 offset:248 +; WAVE32-NEXT: s_clause 0x30 +; WAVE32-NEXT: buffer_load_dword v136, off, s[0:3], s33 offset:252 +; WAVE32-NEXT: buffer_load_dword v127, off, s[0:3], s33 offset:256 +; WAVE32-NEXT: buffer_load_dword v126, off, s[0:3], s33 offset:260 +; WAVE32-NEXT: buffer_load_dword v125, off, s[0:3], s33 offset:264 +; WAVE32-NEXT: buffer_load_dword v124, off, s[0:3], s33 offset:268 +; WAVE32-NEXT: buffer_load_dword v123, off, s[0:3], s33 offset:272 +; WAVE32-NEXT: buffer_load_dword v122, off, s[0:3], s33 offset:276 +; WAVE32-NEXT: buffer_load_dword v121, off, s[0:3], s33 offset:280 +; WAVE32-NEXT: buffer_load_dword v120, off, s[0:3], s33 offset:284 +; WAVE32-NEXT: buffer_load_dword v111, off, s[0:3], s33 offset:288 +; WAVE32-NEXT: buffer_load_dword v110, off, s[0:3], s33 offset:292 +; WAVE32-NEXT: buffer_load_dword v109, off, s[0:3], s33 offset:296 +; WAVE32-NEXT: buffer_load_dword v108, off, s[0:3], s33 offset:300 +; WAVE32-NEXT: buffer_load_dword v107, off, s[0:3], s33 offset:304 +; WAVE32-NEXT: buffer_load_dword v106, off, s[0:3], s33 offset:308 +; WAVE32-NEXT: buffer_load_dword v105, off, s[0:3], s33 offset:312 +; WAVE32-NEXT: buffer_load_dword v104, off, s[0:3], s33 offset:316 +; WAVE32-NEXT: buffer_load_dword v95, off, s[0:3], s33 offset:320 +; WAVE32-NEXT: buffer_load_dword v94, off, s[0:3], s33 offset:324 +; WAVE32-NEXT: buffer_load_dword v93, off, s[0:3], s33 offset:328 +; WAVE32-NEXT: buffer_load_dword v92, off, s[0:3], s33 offset:332 +; WAVE32-NEXT: buffer_load_dword v91, off, s[0:3], s33 offset:336 +; WAVE32-NEXT: buffer_load_dword v90, off, s[0:3], s33 offset:340 +; WAVE32-NEXT: buffer_load_dword v89, off, s[0:3], s33 offset:344 +; WAVE32-NEXT: buffer_load_dword v88, off, s[0:3], s33 offset:348 +; WAVE32-NEXT: buffer_load_dword v79, off, s[0:3], s33 offset:352 +; WAVE32-NEXT: buffer_load_dword v78, off, s[0:3], s33 offset:356 +; WAVE32-NEXT: buffer_load_dword v77, off, s[0:3], s33 offset:360 +; WAVE32-NEXT: buffer_load_dword v76, off, s[0:3], s33 offset:364 +; WAVE32-NEXT: buffer_load_dword v75, off, s[0:3], s33 offset:368 +; WAVE32-NEXT: buffer_load_dword v74, off, s[0:3], s33 offset:372 +; WAVE32-NEXT: buffer_load_dword v73, off, s[0:3], s33 offset:376 +; WAVE32-NEXT: buffer_load_dword v72, off, s[0:3], s33 offset:380 +; WAVE32-NEXT: buffer_load_dword v63, off, s[0:3], s33 offset:384 +; WAVE32-NEXT: buffer_load_dword v62, off, s[0:3], s33 offset:388 +; WAVE32-NEXT: buffer_load_dword v61, off, s[0:3], s33 offset:392 +; WAVE32-NEXT: buffer_load_dword v60, off, s[0:3], s33 offset:396 +; WAVE32-NEXT: buffer_load_dword v59, off, s[0:3], s33 offset:400 +; WAVE32-NEXT: buffer_load_dword v58, off, s[0:3], s33 offset:404 +; WAVE32-NEXT: buffer_load_dword v57, off, s[0:3], s33 offset:408 +; WAVE32-NEXT: buffer_load_dword v56, off, s[0:3], s33 offset:412 +; WAVE32-NEXT: buffer_load_dword v47, off, s[0:3], s33 offset:416 +; WAVE32-NEXT: buffer_load_dword v46, off, s[0:3], s33 offset:420 +; WAVE32-NEXT: buffer_load_dword v45, off, s[0:3], s33 offset:424 +; WAVE32-NEXT: buffer_load_dword v44, off, s[0:3], s33 offset:428 +; WAVE32-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:432 +; WAVE32-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:436 +; WAVE32-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:440 +; WAVE32-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:444 +; WAVE32-NEXT: s_mov_b32 s32, s33 +; WAVE32-NEXT: .cfi_def_cfa_register 64 +; WAVE32-NEXT: s_waitcnt_depctr 0xffe3 +; WAVE32-NEXT: s_mov_b32 s33, s18 +; WAVE32-NEXT: s_waitcnt vmcnt(0) +; WAVE32-NEXT: s_setpc_b64 s[30:31] + call void @caller_needs_to_spill_pc_to_memory() + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind "amdgpu-waves-per-eu"="10,10" } +attributes #2 = { nounwind "frame-pointer"="all" "amdgpu-waves-per-eu"="12,12" } +attributes #3 = { nounwind norecurse } + + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, emissionKind: FullDebug) +!1 = !DIFile(filename: "filename", directory: "directory") +!2 = !{i32 7, !"Dwarf Version", i32 4} +!3 = !{i32 2, !"Debug Info Version", i32 3}