@@ -1202,6 +1202,7 @@ static unsigned getNumSubRegsForSpillOp(const MachineInstr &MI,
12021202 unsigned Op = MI.getOpcode ();
12031203 switch (Op) {
12041204 case AMDGPU::SI_BLOCK_SPILL_V1024_SAVE:
1205+ case AMDGPU::SI_BLOCK_SPILL_V1024_CFI_SAVE:
12051206 case AMDGPU::SI_BLOCK_SPILL_V1024_RESTORE:
12061207 // FIXME: This assumes the mask is statically known and not computed at
12071208 // runtime. However, some ABIs may want to compute the mask dynamically and
@@ -1986,11 +1987,17 @@ void SIRegisterInfo::buildSpillLoadStore(
19861987 MIB.addImm (0 ); // swz
19871988 MIB.addMemOperand (NewMMO);
19881989
1989- if (IsStore && NeedsCFI)
1990- TFL->buildCFIForVGPRToVMEMSpill (MBB, MI, DebugLoc (), SubReg,
1991- (Offset + RegOffset) *
1992- ST.getWavefrontSize () +
1993- AdditionalCFIOffset);
1990+ if (IsStore && NeedsCFI) {
1991+ if (TII->isBlockLoadStore (LoadStoreOp)) {
1992+ assert (RegOffset == 0 &&
1993+ " expected whole register block to be treated as single element" );
1994+ buildCFIForBlockCSRStore (MBB, MI, ValueReg, Offset);
1995+ } else {
1996+ TFL->buildCFIForVGPRToVMEMSpill (
1997+ MBB, MI, DebugLoc (), SubReg,
1998+ (Offset + RegOffset) * ST.getWavefrontSize () + AdditionalCFIOffset);
1999+ }
2000+ }
19942001
19952002 if (!IsAGPR && NeedSuperRegDef)
19962003 MIB.addReg (ValueReg, RegState::ImplicitDefine);
@@ -2061,6 +2068,31 @@ void SIRegisterInfo::addImplicitUsesForBlockCSRLoad(MachineInstrBuilder &MIB,
20612068 MIB.addUse (BaseVGPR + RegOffset, RegState::Implicit);
20622069}
20632070
2071+ void SIRegisterInfo::buildCFIForBlockCSRStore (MachineBasicBlock &MBB,
2072+ MachineBasicBlock::iterator MBBI,
2073+ Register BlockReg,
2074+ int64_t Offset) const {
2075+ const MachineFunction *MF = MBB.getParent ();
2076+ const SIMachineFunctionInfo *FuncInfo = MF->getInfo <SIMachineFunctionInfo>();
2077+ uint32_t Mask = FuncInfo->getMaskForVGPRBlockOps (BlockReg);
2078+ Register BaseVGPR = getSubReg (BlockReg, AMDGPU::sub0);
2079+ for (unsigned RegOffset = 0 ; RegOffset < 32 ; ++RegOffset) {
2080+ Register VGPR = BaseVGPR + RegOffset;
2081+ if (Mask & (1 << RegOffset)) {
2082+ assert (isCalleeSavedPhysReg (VGPR, *MF));
2083+ ST.getFrameLowering ()->buildCFIForVGPRToVMEMSpill (
2084+ MBB, MBBI, DebugLoc (), VGPR,
2085+ (Offset + RegOffset) * ST.getWavefrontSize ());
2086+ } else if (isCalleeSavedPhysReg (VGPR, *MF)) {
2087+ // FIXME: This is a workaround for the fact that FrameLowering's
2088+ // emitPrologueEntryCFI considers the block load to clobber all registers
2089+ // in the block.
2090+ ST.getFrameLowering ()->buildCFIForSameValue (MBB, MBBI, DebugLoc (),
2091+ BaseVGPR + RegOffset);
2092+ }
2093+ }
2094+ }
2095+
20642096void SIRegisterInfo::buildVGPRSpillLoadStore (SGPRSpillBuilder &SB, int Index,
20652097 int Offset, bool IsLoad,
20662098 bool IsKill) const {
@@ -2538,6 +2570,7 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
25382570 }
25392571
25402572 // VGPR register spill
2573+ case AMDGPU::SI_BLOCK_SPILL_V1024_CFI_SAVE:
25412574 case AMDGPU::SI_SPILL_V1024_CFI_SAVE:
25422575 case AMDGPU::SI_SPILL_V512_CFI_SAVE:
25432576 case AMDGPU::SI_SPILL_V256_CFI_SAVE:
@@ -2570,13 +2603,7 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
25702603 case AMDGPU::SI_SPILL_AV32_CFI_SAVE:
25712604 NeedsCFI = true ;
25722605 LLVM_FALLTHROUGH;
2573- case AMDGPU::SI_BLOCK_SPILL_V1024_SAVE: {
2574- // Put mask into M0.
2575- BuildMI (*MBB, MI, MI->getDebugLoc (), TII->get (AMDGPU::S_MOV_B32),
2576- AMDGPU::M0)
2577- .add (*TII->getNamedOperand (*MI, AMDGPU::OpName::mask));
2578- LLVM_FALLTHROUGH;
2579- }
2606+ case AMDGPU::SI_BLOCK_SPILL_V1024_SAVE:
25802607 case AMDGPU::SI_SPILL_V1024_SAVE:
25812608 case AMDGPU::SI_SPILL_V512_SAVE:
25822609 case AMDGPU::SI_SPILL_V384_SAVE:
@@ -2622,6 +2649,16 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
26222649 case AMDGPU::SI_SPILL_AV32_SAVE:
26232650 case AMDGPU::SI_SPILL_WWM_V32_SAVE:
26242651 case AMDGPU::SI_SPILL_WWM_AV32_SAVE: {
2652+ assert (
2653+ MI->getOpcode () != AMDGPU::SI_BLOCK_SPILL_V1024_SAVE &&
2654+ " block spill does not currenty support spilling non-CSR registers" );
2655+
2656+ if (MI->getOpcode () == AMDGPU::SI_BLOCK_SPILL_V1024_CFI_SAVE)
2657+ // Put mask into M0.
2658+ BuildMI (*MBB, MI, MI->getDebugLoc (), TII->get (AMDGPU::S_MOV_B32),
2659+ AMDGPU::M0)
2660+ .add (*TII->getNamedOperand (*MI, AMDGPU::OpName::mask));
2661+
26252662 const MachineOperand *VData = TII->getNamedOperand (*MI,
26262663 AMDGPU::OpName::vdata);
26272664 if (VData->isUndef ()) {
@@ -2637,7 +2674,7 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
26372674 assert (ST.enableFlatScratch () && " Flat Scratch is not enabled!" );
26382675 Opc = AMDGPU::SCRATCH_STORE_SHORT_SADDR_t16;
26392676 } else {
2640- Opc = MI->getOpcode () == AMDGPU::SI_BLOCK_SPILL_V1024_SAVE
2677+ Opc = MI->getOpcode () == AMDGPU::SI_BLOCK_SPILL_V1024_CFI_SAVE
26412678 ? AMDGPU::SCRATCH_STORE_BLOCK_SADDR
26422679 : ST.enableFlatScratch () ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
26432680 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
@@ -2652,7 +2689,7 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
26522689 buildSpillLoadStore (
26532690 *MBB, MI, DL, Opc, Index, VData->getReg (), VData->isKill (), FrameReg,
26542691 TII->getNamedOperand (*MI, AMDGPU::OpName::offset)->getImm (),
2655- *MI->memoperands_begin (), RS);
2692+ *MI->memoperands_begin (), RS, nullptr , NeedsCFI );
26562693 MFI->addToSpilledVGPRs (getNumSubRegsForSpillOp (*MI, TII));
26572694 if (IsWWMRegSpill)
26582695 TII->restoreExec (*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy ());
0 commit comments