diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index f76d1266f495c..246ef7ad481ab 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -2268,7 +2268,7 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, SIMachineFunctionInfo *MFI = MF->getInfo(); MachineFrameInfo &FrameInfo = MF->getFrameInfo(); const SIInstrInfo *TII = ST.getInstrInfo(); - DebugLoc DL = MI->getDebugLoc(); + const DebugLoc &DL = MI->getDebugLoc(); assert(SPAdj == 0 && "unhandled SP adjustment in call sequence?"); @@ -2496,6 +2496,25 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, Register MaterializedReg = FrameReg; Register ScavengedVGPR; + int64_t Offset = FrameInfo.getObjectOffset(Index); + // For the non-immediate case, we could fall through to the default + // handling, but we do an in-place update of the result register here to + // avoid scavenging another register. + if (OtherOp->isImm()) { + int64_t TotalOffset = OtherOp->getImm() + Offset; + + if (!ST.hasVOP3Literal() && SIInstrInfo::isVOP3(*MI) && + !AMDGPU::isInlinableIntLiteral(TotalOffset)) { + // If we can't support a VOP3 literal in the VALU instruction, we + // can't specially fold into the add. + // TODO: Handle VOP3->VOP2 shrink to support the fold. + break; + } + + OtherOp->setImm(TotalOffset); + Offset = 0; + } + if (FrameReg && !ST.enableFlatScratch()) { // We should just do an in-place update of the result register. However, // the value there may also be used by the add, in which case we need a @@ -2516,15 +2535,6 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, MaterializedReg = ScavengedVGPR; } - int64_t Offset = FrameInfo.getObjectOffset(Index); - // For the non-immediate case, we could fall through to the default - // handling, but we do an in-place update of the result register here to - // avoid scavenging another register. - if (OtherOp->isImm()) { - OtherOp->setImm(OtherOp->getImm() + Offset); - Offset = 0; - } - if ((!OtherOp->isImm() || OtherOp->getImm() != 0) && MaterializedReg) { if (ST.enableFlatScratch() && !TII->isOperandLegal(*MI, Src1Idx, OtherOp)) { @@ -2761,411 +2771,408 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, return true; } default: { - // Other access to frame index - const DebugLoc &DL = MI->getDebugLoc(); + break; + } + } - int64_t Offset = FrameInfo.getObjectOffset(Index); - if (ST.enableFlatScratch()) { - if (TII->isFLATScratch(*MI)) { - assert((int16_t)FIOperandNum == - AMDGPU::getNamedOperandIdx(MI->getOpcode(), - AMDGPU::OpName::saddr)); + int64_t Offset = FrameInfo.getObjectOffset(Index); + if (ST.enableFlatScratch()) { + if (TII->isFLATScratch(*MI)) { + assert( + (int16_t)FIOperandNum == + AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::saddr)); - // The offset is always swizzled, just replace it - if (FrameReg) - FIOp->ChangeToRegister(FrameReg, false); + // The offset is always swizzled, just replace it + if (FrameReg) + FIOp->ChangeToRegister(FrameReg, false); - MachineOperand *OffsetOp = + MachineOperand *OffsetOp = TII->getNamedOperand(*MI, AMDGPU::OpName::offset); - int64_t NewOffset = Offset + OffsetOp->getImm(); - if (TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS, - SIInstrFlags::FlatScratch)) { - OffsetOp->setImm(NewOffset); - if (FrameReg) - return false; - Offset = 0; - } + int64_t NewOffset = Offset + OffsetOp->getImm(); + if (TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS, + SIInstrFlags::FlatScratch)) { + OffsetOp->setImm(NewOffset); + if (FrameReg) + return false; + Offset = 0; + } - if (!Offset) { - unsigned Opc = MI->getOpcode(); - int NewOpc = -1; - if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vaddr)) { - NewOpc = AMDGPU::getFlatScratchInstSVfromSVS(Opc); - } else if (ST.hasFlatScratchSTMode()) { - // On GFX10 we have ST mode to use no registers for an address. - // Otherwise we need to materialize 0 into an SGPR. - NewOpc = AMDGPU::getFlatScratchInstSTfromSS(Opc); - } + if (!Offset) { + unsigned Opc = MI->getOpcode(); + int NewOpc = -1; + if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vaddr)) { + NewOpc = AMDGPU::getFlatScratchInstSVfromSVS(Opc); + } else if (ST.hasFlatScratchSTMode()) { + // On GFX10 we have ST mode to use no registers for an address. + // Otherwise we need to materialize 0 into an SGPR. + NewOpc = AMDGPU::getFlatScratchInstSTfromSS(Opc); + } - if (NewOpc != -1) { - // removeOperand doesn't fixup tied operand indexes as it goes, so - // it asserts. Untie vdst_in for now and retie them afterwards. - int VDstIn = AMDGPU::getNamedOperandIdx(Opc, - AMDGPU::OpName::vdst_in); - bool TiedVDst = VDstIn != -1 && - MI->getOperand(VDstIn).isReg() && - MI->getOperand(VDstIn).isTied(); - if (TiedVDst) - MI->untieRegOperand(VDstIn); - - MI->removeOperand( - AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr)); - - if (TiedVDst) { - int NewVDst = - AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst); - int NewVDstIn = - AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst_in); - assert (NewVDst != -1 && NewVDstIn != -1 && "Must be tied!"); - MI->tieOperands(NewVDst, NewVDstIn); - } - MI->setDesc(TII->get(NewOpc)); - return false; + if (NewOpc != -1) { + // removeOperand doesn't fixup tied operand indexes as it goes, so + // it asserts. Untie vdst_in for now and retie them afterwards. + int VDstIn = + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in); + bool TiedVDst = VDstIn != -1 && MI->getOperand(VDstIn).isReg() && + MI->getOperand(VDstIn).isTied(); + if (TiedVDst) + MI->untieRegOperand(VDstIn); + + MI->removeOperand( + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr)); + + if (TiedVDst) { + int NewVDst = + AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst); + int NewVDstIn = + AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst_in); + assert(NewVDst != -1 && NewVDstIn != -1 && "Must be tied!"); + MI->tieOperands(NewVDst, NewVDstIn); } + MI->setDesc(TII->get(NewOpc)); + return false; } } + } - if (!FrameReg) { - FIOp->ChangeToImmediate(Offset); - if (TII->isImmOperandLegal(*MI, FIOperandNum, *FIOp)) - return false; - } + if (!FrameReg) { + FIOp->ChangeToImmediate(Offset); + if (TII->isImmOperandLegal(*MI, FIOperandNum, *FIOp)) + return false; + } - // We need to use register here. Check if we can use an SGPR or need - // a VGPR. - FIOp->ChangeToRegister(AMDGPU::M0, false); - bool UseSGPR = TII->isOperandLegal(*MI, FIOperandNum, FIOp); + // We need to use register here. Check if we can use an SGPR or need + // a VGPR. + FIOp->ChangeToRegister(AMDGPU::M0, false); + bool UseSGPR = TII->isOperandLegal(*MI, FIOperandNum, FIOp); - if (!Offset && FrameReg && UseSGPR) { - FIOp->setReg(FrameReg); - return false; - } + if (!Offset && FrameReg && UseSGPR) { + FIOp->setReg(FrameReg); + return false; + } - const TargetRegisterClass *RC = UseSGPR ? &AMDGPU::SReg_32_XM0RegClass - : &AMDGPU::VGPR_32RegClass; + const TargetRegisterClass *RC = + UseSGPR ? &AMDGPU::SReg_32_XM0RegClass : &AMDGPU::VGPR_32RegClass; - Register TmpReg = - RS->scavengeRegisterBackwards(*RC, MI, false, 0, !UseSGPR); - FIOp->setReg(TmpReg); - FIOp->setIsKill(); + Register TmpReg = + RS->scavengeRegisterBackwards(*RC, MI, false, 0, !UseSGPR); + FIOp->setReg(TmpReg); + FIOp->setIsKill(); - if ((!FrameReg || !Offset) && TmpReg) { - unsigned Opc = UseSGPR ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32; - auto MIB = BuildMI(*MBB, MI, DL, TII->get(Opc), TmpReg); - if (FrameReg) - MIB.addReg(FrameReg); - else - MIB.addImm(Offset); + if ((!FrameReg || !Offset) && TmpReg) { + unsigned Opc = UseSGPR ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32; + auto MIB = BuildMI(*MBB, MI, DL, TII->get(Opc), TmpReg); + if (FrameReg) + MIB.addReg(FrameReg); + else + MIB.addImm(Offset); - return false; - } + return false; + } - bool NeedSaveSCC = RS->isRegUsed(AMDGPU::SCC) && - !MI->definesRegister(AMDGPU::SCC, /*TRI=*/nullptr); + bool NeedSaveSCC = RS->isRegUsed(AMDGPU::SCC) && + !MI->definesRegister(AMDGPU::SCC, /*TRI=*/nullptr); - Register TmpSReg = - UseSGPR ? TmpReg - : RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass, - MI, false, 0, !UseSGPR); + Register TmpSReg = + UseSGPR ? TmpReg + : RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass, + MI, false, 0, !UseSGPR); - // TODO: for flat scratch another attempt can be made with a VGPR index - // if no SGPRs can be scavenged. - if ((!TmpSReg && !FrameReg) || (!TmpReg && !UseSGPR)) - report_fatal_error("Cannot scavenge register in FI elimination!"); + // TODO: for flat scratch another attempt can be made with a VGPR index + // if no SGPRs can be scavenged. + if ((!TmpSReg && !FrameReg) || (!TmpReg && !UseSGPR)) + report_fatal_error("Cannot scavenge register in FI elimination!"); - if (!TmpSReg) { - // Use frame register and restore it after. - TmpSReg = FrameReg; - FIOp->setReg(FrameReg); - FIOp->setIsKill(false); - } + if (!TmpSReg) { + // Use frame register and restore it after. + TmpSReg = FrameReg; + FIOp->setReg(FrameReg); + FIOp->setIsKill(false); + } - if (NeedSaveSCC) { - assert(!(Offset & 0x1) && "Flat scratch offset must be aligned!"); - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADDC_U32), TmpSReg) - .addReg(FrameReg) - .addImm(Offset); - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_BITCMP1_B32)) - .addReg(TmpSReg) - .addImm(0); - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_BITSET0_B32), TmpSReg) + if (NeedSaveSCC) { + assert(!(Offset & 0x1) && "Flat scratch offset must be aligned!"); + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADDC_U32), TmpSReg) + .addReg(FrameReg) + .addImm(Offset); + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_BITCMP1_B32)) + .addReg(TmpSReg) + .addImm(0); + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_BITSET0_B32), TmpSReg) + .addImm(0) + .addReg(TmpSReg); + } else { + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), TmpSReg) + .addReg(FrameReg) + .addImm(Offset); + } + + if (!UseSGPR) + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg) + .addReg(TmpSReg, RegState::Kill); + + if (TmpSReg == FrameReg) { + // Undo frame register modification. + if (NeedSaveSCC && + !MI->registerDefIsDead(AMDGPU::SCC, /*TRI=*/nullptr)) { + MachineBasicBlock::iterator I = + BuildMI(*MBB, std::next(MI), DL, TII->get(AMDGPU::S_ADDC_U32), + TmpSReg) + .addReg(FrameReg) + .addImm(-Offset); + I = BuildMI(*MBB, std::next(I), DL, TII->get(AMDGPU::S_BITCMP1_B32)) + .addReg(TmpSReg) + .addImm(0); + BuildMI(*MBB, std::next(I), DL, TII->get(AMDGPU::S_BITSET0_B32), + TmpSReg) .addImm(0) .addReg(TmpSReg); } else { - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), TmpSReg) + BuildMI(*MBB, std::next(MI), DL, TII->get(AMDGPU::S_ADD_I32), + FrameReg) .addReg(FrameReg) - .addImm(Offset); + .addImm(-Offset); } + } - if (!UseSGPR) - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg) - .addReg(TmpSReg, RegState::Kill); - - if (TmpSReg == FrameReg) { - // Undo frame register modification. - if (NeedSaveSCC && - !MI->registerDefIsDead(AMDGPU::SCC, /*TRI=*/nullptr)) { - MachineBasicBlock::iterator I = - BuildMI(*MBB, std::next(MI), DL, TII->get(AMDGPU::S_ADDC_U32), - TmpSReg) - .addReg(FrameReg) - .addImm(-Offset); - I = BuildMI(*MBB, std::next(I), DL, TII->get(AMDGPU::S_BITCMP1_B32)) - .addReg(TmpSReg) - .addImm(0); - BuildMI(*MBB, std::next(I), DL, TII->get(AMDGPU::S_BITSET0_B32), - TmpSReg) - .addImm(0) - .addReg(TmpSReg); - } else { - BuildMI(*MBB, std::next(MI), DL, TII->get(AMDGPU::S_ADD_I32), - FrameReg) - .addReg(FrameReg) - .addImm(-Offset); - } - } + return false; + } - return false; - } + bool IsMUBUF = TII->isMUBUF(*MI); + + if (!IsMUBUF && !MFI->isBottomOfStack()) { + // Convert to a swizzled stack address by scaling by the wave size. + // In an entry function/kernel the offset is already swizzled. + bool IsSALU = isSGPRClass(TII->getOpRegClass(*MI, FIOperandNum)); + bool LiveSCC = RS->isRegUsed(AMDGPU::SCC) && + !MI->definesRegister(AMDGPU::SCC, /*TRI=*/nullptr); + const TargetRegisterClass *RC = IsSALU && !LiveSCC + ? &AMDGPU::SReg_32RegClass + : &AMDGPU::VGPR_32RegClass; + bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32 || + MI->getOpcode() == AMDGPU::V_MOV_B32_e64 || + MI->getOpcode() == AMDGPU::S_MOV_B32; + Register ResultReg = + IsCopy ? MI->getOperand(0).getReg() + : RS->scavengeRegisterBackwards(*RC, MI, false, 0); - bool IsMUBUF = TII->isMUBUF(*MI); - - if (!IsMUBUF && !MFI->isBottomOfStack()) { - // Convert to a swizzled stack address by scaling by the wave size. - // In an entry function/kernel the offset is already swizzled. - bool IsSALU = isSGPRClass(TII->getOpRegClass(*MI, FIOperandNum)); - bool LiveSCC = RS->isRegUsed(AMDGPU::SCC) && - !MI->definesRegister(AMDGPU::SCC, /*TRI=*/nullptr); - const TargetRegisterClass *RC = IsSALU && !LiveSCC - ? &AMDGPU::SReg_32RegClass - : &AMDGPU::VGPR_32RegClass; - bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32 || - MI->getOpcode() == AMDGPU::V_MOV_B32_e64 || - MI->getOpcode() == AMDGPU::S_MOV_B32; - Register ResultReg = - IsCopy ? MI->getOperand(0).getReg() - : RS->scavengeRegisterBackwards(*RC, MI, false, 0); - - int64_t Offset = FrameInfo.getObjectOffset(Index); - if (Offset == 0) { - unsigned OpCode = IsSALU && !LiveSCC ? AMDGPU::S_LSHR_B32 - : AMDGPU::V_LSHRREV_B32_e64; - Register TmpResultReg = ResultReg; - if (IsSALU && LiveSCC) { - TmpResultReg = RS->scavengeRegisterBackwards( - AMDGPU::VGPR_32RegClass, MI, false, 0); - } + int64_t Offset = FrameInfo.getObjectOffset(Index); + if (Offset == 0) { + unsigned OpCode = + IsSALU && !LiveSCC ? AMDGPU::S_LSHR_B32 : AMDGPU::V_LSHRREV_B32_e64; + Register TmpResultReg = ResultReg; + if (IsSALU && LiveSCC) { + TmpResultReg = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass, + MI, false, 0); + } - auto Shift = BuildMI(*MBB, MI, DL, TII->get(OpCode), TmpResultReg); - if (OpCode == AMDGPU::V_LSHRREV_B32_e64) - // For V_LSHRREV, the operands are reversed (the shift count goes - // first). - Shift.addImm(ST.getWavefrontSizeLog2()).addReg(FrameReg); - else - Shift.addReg(FrameReg).addImm(ST.getWavefrontSizeLog2()); - if (IsSALU && !LiveSCC) - Shift.getInstr()->getOperand(3).setIsDead(); // Mark SCC as dead. - if (IsSALU && LiveSCC) { - Register NewDest = - IsCopy ? ResultReg - : RS->scavengeRegisterBackwards(AMDGPU::SReg_32RegClass, - Shift, false, 0); - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), - NewDest) - .addReg(TmpResultReg); - ResultReg = NewDest; - } - } else { - MachineInstrBuilder MIB; - if (!IsSALU) { - if ((MIB = TII->getAddNoCarry(*MBB, MI, DL, ResultReg, *RS)) != - nullptr) { - // Reuse ResultReg in intermediate step. - Register ScaledReg = ResultReg; - - BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), - ScaledReg) + auto Shift = BuildMI(*MBB, MI, DL, TII->get(OpCode), TmpResultReg); + if (OpCode == AMDGPU::V_LSHRREV_B32_e64) + // For V_LSHRREV, the operands are reversed (the shift count goes + // first). + Shift.addImm(ST.getWavefrontSizeLog2()).addReg(FrameReg); + else + Shift.addReg(FrameReg).addImm(ST.getWavefrontSizeLog2()); + if (IsSALU && !LiveSCC) + Shift.getInstr()->getOperand(3).setIsDead(); // Mark SCC as dead. + if (IsSALU && LiveSCC) { + Register NewDest = + IsCopy ? ResultReg + : RS->scavengeRegisterBackwards(AMDGPU::SReg_32RegClass, + Shift, false, 0); + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), NewDest) + .addReg(TmpResultReg); + ResultReg = NewDest; + } + } else { + MachineInstrBuilder MIB; + if (!IsSALU) { + if ((MIB = TII->getAddNoCarry(*MBB, MI, DL, ResultReg, *RS)) != + nullptr) { + // Reuse ResultReg in intermediate step. + Register ScaledReg = ResultReg; + + BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), + ScaledReg) .addImm(ST.getWavefrontSizeLog2()) .addReg(FrameReg); - const bool IsVOP2 = MIB->getOpcode() == AMDGPU::V_ADD_U32_e32; + const bool IsVOP2 = MIB->getOpcode() == AMDGPU::V_ADD_U32_e32; - // TODO: Fold if use instruction is another add of a constant. - if (IsVOP2 || AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) { - // FIXME: This can fail - MIB.addImm(Offset); - MIB.addReg(ScaledReg, RegState::Kill); - if (!IsVOP2) - MIB.addImm(0); // clamp bit - } else { - assert(MIB->getOpcode() == AMDGPU::V_ADD_CO_U32_e64 && - "Need to reuse carry out register"); - - // Use scavenged unused carry out as offset register. - Register ConstOffsetReg; - if (!isWave32) - ConstOffsetReg = getSubReg(MIB.getReg(1), AMDGPU::sub0); - else - ConstOffsetReg = MIB.getReg(1); - - BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg) - .addImm(Offset); - MIB.addReg(ConstOffsetReg, RegState::Kill); - MIB.addReg(ScaledReg, RegState::Kill); + // TODO: Fold if use instruction is another add of a constant. + if (IsVOP2 || + AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) { + // FIXME: This can fail + MIB.addImm(Offset); + MIB.addReg(ScaledReg, RegState::Kill); + if (!IsVOP2) MIB.addImm(0); // clamp bit - } + } else { + assert(MIB->getOpcode() == AMDGPU::V_ADD_CO_U32_e64 && + "Need to reuse carry out register"); + + // Use scavenged unused carry out as offset register. + Register ConstOffsetReg; + if (!isWave32) + ConstOffsetReg = getSubReg(MIB.getReg(1), AMDGPU::sub0); + else + ConstOffsetReg = MIB.getReg(1); + + BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::S_MOV_B32), + ConstOffsetReg) + .addImm(Offset); + MIB.addReg(ConstOffsetReg, RegState::Kill); + MIB.addReg(ScaledReg, RegState::Kill); + MIB.addImm(0); // clamp bit } } - if (!MIB || IsSALU) { - // We have to produce a carry out, and there isn't a free SGPR pair - // for it. We can keep the whole computation on the SALU to avoid - // clobbering an additional register at the cost of an extra mov. - - // We may have 1 free scratch SGPR even though a carry out is - // unavailable. Only one additional mov is needed. - Register TmpScaledReg = IsCopy && IsSALU - ? ResultReg - : RS->scavengeRegisterBackwards( - AMDGPU::SReg_32_XM0RegClass, MI, - false, 0, /*AllowSpill=*/false); - Register ScaledReg = - TmpScaledReg.isValid() ? TmpScaledReg : FrameReg; - Register TmpResultReg = ScaledReg; - - if (!LiveSCC) { - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHR_B32), TmpResultReg) - .addReg(FrameReg) - .addImm(ST.getWavefrontSizeLog2()); - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), TmpResultReg) - .addReg(TmpResultReg, RegState::Kill) - .addImm(Offset); + } + if (!MIB || IsSALU) { + // We have to produce a carry out, and there isn't a free SGPR pair + // for it. We can keep the whole computation on the SALU to avoid + // clobbering an additional register at the cost of an extra mov. + + // We may have 1 free scratch SGPR even though a carry out is + // unavailable. Only one additional mov is needed. + Register TmpScaledReg = IsCopy && IsSALU + ? ResultReg + : RS->scavengeRegisterBackwards( + AMDGPU::SReg_32_XM0RegClass, MI, + false, 0, /*AllowSpill=*/false); + Register ScaledReg = TmpScaledReg.isValid() ? TmpScaledReg : FrameReg; + Register TmpResultReg = ScaledReg; + + if (!LiveSCC) { + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHR_B32), TmpResultReg) + .addReg(FrameReg) + .addImm(ST.getWavefrontSizeLog2()); + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), TmpResultReg) + .addReg(TmpResultReg, RegState::Kill) + .addImm(Offset); + } else { + TmpResultReg = RS->scavengeRegisterBackwards( + AMDGPU::VGPR_32RegClass, MI, false, 0, /*AllowSpill=*/true); + + MachineInstrBuilder Add; + if ((Add = TII->getAddNoCarry(*MBB, MI, DL, TmpResultReg, *RS))) { + BuildMI(*MBB, *Add, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), + TmpResultReg) + .addImm(ST.getWavefrontSizeLog2()) + .addReg(FrameReg); + if (Add->getOpcode() == AMDGPU::V_ADD_CO_U32_e64) { + BuildMI(*MBB, *Add, DL, TII->get(AMDGPU::S_MOV_B32), ResultReg) + .addImm(Offset); + Add.addReg(ResultReg, RegState::Kill) + .addReg(TmpResultReg, RegState::Kill) + .addImm(0); + } else + Add.addImm(Offset).addReg(TmpResultReg, RegState::Kill); } else { - TmpResultReg = RS->scavengeRegisterBackwards( - AMDGPU::VGPR_32RegClass, MI, false, 0, /*AllowSpill=*/true); - - MachineInstrBuilder Add; - if ((Add = TII->getAddNoCarry(*MBB, MI, DL, TmpResultReg, *RS))) { - BuildMI(*MBB, *Add, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), + assert(Offset > 0 && isUInt<24>(2 * ST.getMaxWaveScratchSize()) && + "offset is unsafe for v_mad_u32_u24"); + + // We start with a frame pointer with a wave space value, and + // an offset in lane-space. We are materializing a lane space + // value. We can either do a right shift of the frame pointer + // to get to lane space, or a left shift of the offset to get + // to wavespace. We can right shift after the computation to + // get back to the desired per-lane value. We are using the + // mad_u32_u24 primarily as an add with no carry out clobber. + bool IsInlinableLiteral = + AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm()); + if (!IsInlinableLiteral) { + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpResultReg) - .addImm(ST.getWavefrontSizeLog2()) - .addReg(FrameReg); - if (Add->getOpcode() == AMDGPU::V_ADD_CO_U32_e64) { - BuildMI(*MBB, *Add, DL, TII->get(AMDGPU::S_MOV_B32), - ResultReg) - .addImm(Offset); - Add.addReg(ResultReg, RegState::Kill) - .addReg(TmpResultReg, RegState::Kill) - .addImm(0); - } else - Add.addImm(Offset).addReg(TmpResultReg, RegState::Kill); - } else { - assert(Offset > 0 && - isUInt<24>(2 * ST.getMaxWaveScratchSize()) && - "offset is unsafe for v_mad_u32_u24"); - - // We start with a frame pointer with a wave space value, and - // an offset in lane-space. We are materializing a lane space - // value. We can either do a right shift of the frame pointer - // to get to lane space, or a left shift of the offset to get - // to wavespace. We can right shift after the computation to - // get back to the desired per-lane value. We are using the - // mad_u32_u24 primarily as an add with no carry out clobber. - bool IsInlinableLiteral = AMDGPU::isInlinableLiteral32( - Offset, ST.hasInv2PiInlineImm()); - if (!IsInlinableLiteral) { - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), - TmpResultReg) - .addImm(Offset); - } - - Add = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MAD_U32_U24_e64), - TmpResultReg); - - if (!IsInlinableLiteral) { - Add.addReg(TmpResultReg, RegState::Kill); - } else { - // We fold the offset into mad itself if its inlinable. - Add.addImm(Offset); - } - Add.addImm(ST.getWavefrontSize()).addReg(FrameReg).addImm(0); - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), - TmpResultReg) - .addImm(ST.getWavefrontSizeLog2()) - .addReg(TmpResultReg); + .addImm(Offset); } - Register NewDest = IsCopy ? ResultReg - : RS->scavengeRegisterBackwards( - AMDGPU::SReg_32RegClass, *Add, - false, 0, /*AllowSpill=*/true); - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), - NewDest) + Add = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MAD_U32_U24_e64), + TmpResultReg); + + if (!IsInlinableLiteral) { + Add.addReg(TmpResultReg, RegState::Kill); + } else { + // We fold the offset into mad itself if its inlinable. + Add.addImm(Offset); + } + Add.addImm(ST.getWavefrontSize()).addReg(FrameReg).addImm(0); + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), + TmpResultReg) + .addImm(ST.getWavefrontSizeLog2()) .addReg(TmpResultReg); - ResultReg = NewDest; } - if (!IsSALU) - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), ResultReg) - .addReg(TmpResultReg, RegState::Kill); - else - ResultReg = TmpResultReg; - // If there were truly no free SGPRs, we need to undo everything. - if (!TmpScaledReg.isValid()) { - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), ScaledReg) + + Register NewDest = IsCopy ? ResultReg + : RS->scavengeRegisterBackwards( + AMDGPU::SReg_32RegClass, *Add, + false, 0, /*AllowSpill=*/true); + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), + NewDest) + .addReg(TmpResultReg); + ResultReg = NewDest; + } + if (!IsSALU) + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), ResultReg) + .addReg(TmpResultReg, RegState::Kill); + else + ResultReg = TmpResultReg; + // If there were truly no free SGPRs, we need to undo everything. + if (!TmpScaledReg.isValid()) { + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), ScaledReg) .addReg(ScaledReg, RegState::Kill) .addImm(-Offset); - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHL_B32), ScaledReg) + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHL_B32), ScaledReg) .addReg(FrameReg) .addImm(ST.getWavefrontSizeLog2()); - } } } + } - // Don't introduce an extra copy if we're just materializing in a mov. - if (IsCopy) { - MI->eraseFromParent(); - return true; - } - FIOp->ChangeToRegister(ResultReg, false, false, true); - return false; + // Don't introduce an extra copy if we're just materializing in a mov. + if (IsCopy) { + MI->eraseFromParent(); + return true; } + FIOp->ChangeToRegister(ResultReg, false, false, true); + return false; + } - if (IsMUBUF) { - // Disable offen so we don't need a 0 vgpr base. - assert(static_cast(FIOperandNum) == - AMDGPU::getNamedOperandIdx(MI->getOpcode(), - AMDGPU::OpName::vaddr)); + if (IsMUBUF) { + // Disable offen so we don't need a 0 vgpr base. + assert( + static_cast(FIOperandNum) == + AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::vaddr)); - auto &SOffset = *TII->getNamedOperand(*MI, AMDGPU::OpName::soffset); - assert((SOffset.isImm() && SOffset.getImm() == 0)); + auto &SOffset = *TII->getNamedOperand(*MI, AMDGPU::OpName::soffset); + assert((SOffset.isImm() && SOffset.getImm() == 0)); - if (FrameReg != AMDGPU::NoRegister) - SOffset.ChangeToRegister(FrameReg, false); + if (FrameReg != AMDGPU::NoRegister) + SOffset.ChangeToRegister(FrameReg, false); - int64_t Offset = FrameInfo.getObjectOffset(Index); - int64_t OldImm - = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(); - int64_t NewOffset = OldImm + Offset; + int64_t Offset = FrameInfo.getObjectOffset(Index); + int64_t OldImm = + TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(); + int64_t NewOffset = OldImm + Offset; - if (TII->isLegalMUBUFImmOffset(NewOffset) && - buildMUBUFOffsetLoadStore(ST, FrameInfo, MI, Index, NewOffset)) { - MI->eraseFromParent(); - return true; - } + if (TII->isLegalMUBUFImmOffset(NewOffset) && + buildMUBUFOffsetLoadStore(ST, FrameInfo, MI, Index, NewOffset)) { + MI->eraseFromParent(); + return true; } + } - // If the offset is simply too big, don't convert to a scratch wave offset - // relative index. + // If the offset is simply too big, don't convert to a scratch wave offset + // relative index. - FIOp->ChangeToImmediate(Offset); - if (!TII->isImmOperandLegal(*MI, FIOperandNum, *FIOp)) { - Register TmpReg = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass, - MI, false, 0); - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg) + FIOp->ChangeToImmediate(Offset); + if (!TII->isImmOperandLegal(*MI, FIOperandNum, *FIOp)) { + Register TmpReg = + RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass, MI, false, 0); + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg) .addImm(Offset); - FIOp->ChangeToRegister(TmpReg, false, false, true); - } + FIOp->ChangeToRegister(TmpReg, false, false, true); } - } + return false; } diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir index fc6cd74bf052c..831e246426ba7 100644 --- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir +++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir @@ -980,8 +980,8 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX7-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX7-NEXT: $vgpr1 = V_MOV_B32_e32 84, implicit $exec - ; GFX7-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 killed $vgpr1, 0, 1, implicit $exec + ; GFX7-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec + ; GFX7-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 12, killed $vgpr1, 1, implicit $exec ; GFX7-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX8-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp @@ -989,8 +989,8 @@ body: | ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX8-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX8-NEXT: $vgpr1 = V_MOV_B32_e32 84, implicit $exec - ; GFX8-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 killed $vgpr1, 0, 1, implicit $exec + ; GFX8-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec + ; GFX8-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 12, killed $vgpr1, 1, implicit $exec ; GFX8-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX900-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp @@ -998,8 +998,8 @@ body: | ; GFX900-NEXT: {{ $}} ; GFX900-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX900-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX900-NEXT: $vgpr1 = V_MOV_B32_e32 84, implicit $exec - ; GFX900-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 killed $vgpr1, 0, 1, implicit $exec + ; GFX900-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec + ; GFX900-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 12, killed $vgpr1, 1, implicit $exec ; GFX900-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX90A-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp @@ -1007,8 +1007,8 @@ body: | ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX90A-NEXT: $vgpr1 = V_MOV_B32_e32 84, implicit $exec - ; GFX90A-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 killed $vgpr1, 0, 1, implicit $exec + ; GFX90A-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec + ; GFX90A-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 12, killed $vgpr1, 1, implicit $exec ; GFX90A-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX10-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp @@ -1020,8 +1020,8 @@ body: | ; GFX10-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX940-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp - ; GFX940: $vgpr1 = V_MOV_B32_e32 84, implicit $exec - ; GFX940-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 killed $vgpr1, 0, 1, implicit $exec + ; GFX940: $sgpr4 = S_MOV_B32 72 + ; GFX940-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 12, killed $sgpr4, 1, implicit $exec ; GFX940-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX11-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp @@ -2036,3 +2036,521 @@ body: | S_ENDPGM 0 ... + +--- +name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc +tracksRegLiveness: true +frameInfo: + localFrameSize: 12576 +stack: + - { id: 0, size: 4, alignment: 8192, local-offset: 0 } + - { id: 1, size: 8480, alignment: 4096, local-offset: 4096 } +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' +body: | + bb.0: + ; GFX7-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc + ; GFX7: liveins: $sgpr4 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX7-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc + ; GFX7-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc + ; GFX7-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc + ; GFX7-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec + ; GFX7-NEXT: $vcc_lo = S_MOV_B32 12288 + ; GFX7-NEXT: $vgpr1, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr1, 0, implicit $exec + ; GFX7-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec + ; GFX7-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc + ; GFX7-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 + ; GFX7-NEXT: SI_RETURN implicit $vgpr0 + ; + ; GFX8-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc + ; GFX8: liveins: $sgpr4 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX8-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc + ; GFX8-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc + ; GFX8-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc + ; GFX8-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec + ; GFX8-NEXT: $vcc_lo = S_MOV_B32 12288 + ; GFX8-NEXT: $vgpr1, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr1, 0, implicit $exec + ; GFX8-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec + ; GFX8-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc + ; GFX8-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 + ; GFX8-NEXT: SI_RETURN implicit $vgpr0 + ; + ; GFX900-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc + ; GFX900: liveins: $sgpr4 + ; GFX900-NEXT: {{ $}} + ; GFX900-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX900-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc + ; GFX900-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc + ; GFX900-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc + ; GFX900-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec + ; GFX900-NEXT: $vgpr1 = V_ADD_U32_e32 12288, killed $vgpr1, implicit $exec + ; GFX900-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec + ; GFX900-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc + ; GFX900-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 + ; GFX900-NEXT: SI_RETURN implicit $vgpr0 + ; + ; GFX90A-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc + ; GFX90A: liveins: $sgpr4 + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX90A-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc + ; GFX90A-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc + ; GFX90A-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc + ; GFX90A-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec + ; GFX90A-NEXT: $vgpr1 = V_ADD_U32_e32 12288, killed $vgpr1, implicit $exec + ; GFX90A-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec + ; GFX90A-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc + ; GFX90A-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 + ; GFX90A-NEXT: SI_RETURN implicit $vgpr0 + ; + ; GFX10-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc + ; GFX10: liveins: $sgpr4 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX10-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc + ; GFX10-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc + ; GFX10-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc + ; GFX10-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec + ; GFX10-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec + ; GFX10-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc + ; GFX10-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 + ; GFX10-NEXT: SI_RETURN implicit $vgpr0 + ; + ; GFX940-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc + ; GFX940: liveins: $sgpr4 + ; GFX940-NEXT: {{ $}} + ; GFX940-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX940-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc + ; GFX940-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc + ; GFX940-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc + ; GFX940-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 12288, implicit-def $scc + ; GFX940-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $sgpr5, 0, implicit $exec + ; GFX940-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -32768, implicit-def dead $scc + ; GFX940-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 + ; GFX940-NEXT: SI_RETURN implicit $vgpr0 + ; + ; GFX11-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc + ; GFX11: liveins: $sgpr4 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX11-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc + ; GFX11-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc + ; GFX11-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc + ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec + ; GFX11-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec + ; GFX11-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -32768, implicit-def dead $scc + ; GFX11-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 + ; GFX11-NEXT: SI_RETURN implicit $vgpr0 + ; + ; GFX12-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc + ; GFX12: liveins: $sgpr4 + ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX12-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc + ; GFX12-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc + ; GFX12-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24576, implicit-def dead $scc + ; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec + ; GFX12-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 4160, killed $vgpr1, 0, implicit $exec + ; GFX12-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -24576, implicit-def dead $scc + ; GFX12-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 + ; GFX12-NEXT: SI_RETURN implicit $vgpr0 + renamable $vgpr0, renamable dead $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, %stack.1, 0, implicit $exec + SI_RETURN implicit $vgpr0 + +... + +--- +name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live +tracksRegLiveness: true +frameInfo: + localFrameSize: 12576 +stack: + - { id: 0, size: 4, alignment: 8192, local-offset: 0 } + - { id: 1, size: 8480, alignment: 4096, local-offset: 4096 } +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' +body: | + bb.0: + ; GFX7-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live + ; GFX7: liveins: $sgpr4 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX7-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc + ; GFX7-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc + ; GFX7-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc + ; GFX7-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec + ; GFX7-NEXT: $vcc_lo = S_MOV_B32 12288 + ; GFX7-NEXT: $vgpr1, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr1, 0, implicit $exec + ; GFX7-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec + ; GFX7-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc + ; GFX7-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 + ; GFX7-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8_sgpr9 + ; + ; GFX8-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live + ; GFX8: liveins: $sgpr4 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX8-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc + ; GFX8-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc + ; GFX8-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc + ; GFX8-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec + ; GFX8-NEXT: $vcc_lo = S_MOV_B32 12288 + ; GFX8-NEXT: $vgpr1, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr1, 0, implicit $exec + ; GFX8-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec + ; GFX8-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc + ; GFX8-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 + ; GFX8-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8_sgpr9 + ; + ; GFX900-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live + ; GFX900: liveins: $sgpr4 + ; GFX900-NEXT: {{ $}} + ; GFX900-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX900-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc + ; GFX900-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc + ; GFX900-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc + ; GFX900-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec + ; GFX900-NEXT: $vgpr1 = V_ADD_U32_e32 12288, killed $vgpr1, implicit $exec + ; GFX900-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec + ; GFX900-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc + ; GFX900-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 + ; GFX900-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8_sgpr9 + ; + ; GFX90A-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live + ; GFX90A: liveins: $sgpr4 + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX90A-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc + ; GFX90A-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc + ; GFX90A-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc + ; GFX90A-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec + ; GFX90A-NEXT: $vgpr1 = V_ADD_U32_e32 12288, killed $vgpr1, implicit $exec + ; GFX90A-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec + ; GFX90A-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc + ; GFX90A-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 + ; GFX90A-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8_sgpr9 + ; + ; GFX10-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live + ; GFX10: liveins: $sgpr4 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX10-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc + ; GFX10-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc + ; GFX10-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc + ; GFX10-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec + ; GFX10-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec + ; GFX10-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec + ; GFX10-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc + ; GFX10-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 + ; GFX10-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8_sgpr9 + ; + ; GFX940-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live + ; GFX940: liveins: $sgpr4 + ; GFX940-NEXT: {{ $}} + ; GFX940-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX940-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc + ; GFX940-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc + ; GFX940-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc + ; GFX940-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 12288, implicit-def $scc + ; GFX940-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $sgpr5, 0, implicit $exec + ; GFX940-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -32768, implicit-def dead $scc + ; GFX940-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 + ; GFX940-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8_sgpr9 + ; + ; GFX11-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live + ; GFX11: liveins: $sgpr4 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX11-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc + ; GFX11-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc + ; GFX11-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc + ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec + ; GFX11-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec + ; GFX11-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec + ; GFX11-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -32768, implicit-def dead $scc + ; GFX11-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 + ; GFX11-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8_sgpr9 + ; + ; GFX12-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live + ; GFX12: liveins: $sgpr4 + ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX12-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc + ; GFX12-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc + ; GFX12-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24576, implicit-def dead $scc + ; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec + ; GFX12-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 4160, killed $vgpr1, 0, implicit $exec + ; GFX12-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec + ; GFX12-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -24576, implicit-def dead $scc + ; GFX12-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 + ; GFX12-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8_sgpr9 + renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, %stack.1, 0, implicit $exec + SI_RETURN implicit $vgpr0, implicit $sgpr8_sgpr9 + +... + +--- +name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc +tracksRegLiveness: true +frameInfo: + localFrameSize: 12576 +stack: + - { id: 0, size: 4, alignment: 8192, local-offset: 0 } + - { id: 1, size: 8480, alignment: 4096, local-offset: 4096 } +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' +body: | + bb.0: + ; GFX7-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc + ; GFX7: liveins: $sgpr4 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX7-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc + ; GFX7-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc + ; GFX7-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc + ; GFX7-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec + ; GFX7-NEXT: $vcc_lo = S_MOV_B32 12288 + ; GFX7-NEXT: $vgpr1, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr1, 0, implicit $exec + ; GFX7-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec + ; GFX7-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc + ; GFX7-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 + ; GFX7-NEXT: SI_RETURN implicit $vgpr0 + ; + ; GFX8-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc + ; GFX8: liveins: $sgpr4 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX8-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc + ; GFX8-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc + ; GFX8-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc + ; GFX8-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec + ; GFX8-NEXT: $vcc_lo = S_MOV_B32 12288 + ; GFX8-NEXT: $vgpr1, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr1, 0, implicit $exec + ; GFX8-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec + ; GFX8-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc + ; GFX8-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 + ; GFX8-NEXT: SI_RETURN implicit $vgpr0 + ; + ; GFX900-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc + ; GFX900: liveins: $sgpr4 + ; GFX900-NEXT: {{ $}} + ; GFX900-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX900-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc + ; GFX900-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc + ; GFX900-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc + ; GFX900-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec + ; GFX900-NEXT: $vgpr1 = V_ADD_U32_e32 12288, killed $vgpr1, implicit $exec + ; GFX900-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec + ; GFX900-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc + ; GFX900-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 + ; GFX900-NEXT: SI_RETURN implicit $vgpr0 + ; + ; GFX90A-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc + ; GFX90A: liveins: $sgpr4 + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX90A-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc + ; GFX90A-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc + ; GFX90A-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc + ; GFX90A-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec + ; GFX90A-NEXT: $vgpr1 = V_ADD_U32_e32 12288, killed $vgpr1, implicit $exec + ; GFX90A-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec + ; GFX90A-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc + ; GFX90A-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 + ; GFX90A-NEXT: SI_RETURN implicit $vgpr0 + ; + ; GFX10-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc + ; GFX10: liveins: $sgpr4 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX10-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc + ; GFX10-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc + ; GFX10-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc + ; GFX10-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec + ; GFX10-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec + ; GFX10-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc + ; GFX10-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 + ; GFX10-NEXT: SI_RETURN implicit $vgpr0 + ; + ; GFX940-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc + ; GFX940: liveins: $sgpr4 + ; GFX940-NEXT: {{ $}} + ; GFX940-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX940-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc + ; GFX940-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc + ; GFX940-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc + ; GFX940-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 12288, implicit-def $scc + ; GFX940-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 64, killed $sgpr5, 0, implicit $exec + ; GFX940-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -32768, implicit-def dead $scc + ; GFX940-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 + ; GFX940-NEXT: SI_RETURN implicit $vgpr0 + ; + ; GFX11-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc + ; GFX11: liveins: $sgpr4 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX11-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc + ; GFX11-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc + ; GFX11-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc + ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec + ; GFX11-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec + ; GFX11-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -32768, implicit-def dead $scc + ; GFX11-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 + ; GFX11-NEXT: SI_RETURN implicit $vgpr0 + ; + ; GFX12-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc + ; GFX12: liveins: $sgpr4 + ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX12-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc + ; GFX12-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc + ; GFX12-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24576, implicit-def dead $scc + ; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec + ; GFX12-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 4160, killed $vgpr1, 0, implicit $exec + ; GFX12-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -24576, implicit-def dead $scc + ; GFX12-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 + ; GFX12-NEXT: SI_RETURN implicit $vgpr0 + renamable $vgpr0, renamable dead $vcc = V_ADD_CO_U32_e64 64, %stack.1, 0, implicit $exec + SI_RETURN implicit $vgpr0 + +... + +--- +name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live +tracksRegLiveness: true +frameInfo: + localFrameSize: 12576 +stack: + - { id: 0, size: 4, alignment: 8192, local-offset: 0 } + - { id: 1, size: 8480, alignment: 4096, local-offset: 4096 } +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' +body: | + bb.0: + ; GFX7-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live + ; GFX7: liveins: $sgpr4 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX7-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc + ; GFX7-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc + ; GFX7-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc + ; GFX7-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec + ; GFX7-NEXT: $sgpr6 = S_MOV_B32 12288 + ; GFX7-NEXT: $vgpr1, dead $sgpr6_sgpr7 = V_ADD_CO_U32_e64 killed $sgpr6, killed $vgpr1, 0, implicit $exec + ; GFX7-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec + ; GFX7-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc + ; GFX7-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 + ; GFX7-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc + ; + ; GFX8-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live + ; GFX8: liveins: $sgpr4 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX8-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc + ; GFX8-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc + ; GFX8-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc + ; GFX8-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec + ; GFX8-NEXT: $sgpr6 = S_MOV_B32 12288 + ; GFX8-NEXT: $vgpr1, dead $sgpr6_sgpr7 = V_ADD_CO_U32_e64 killed $sgpr6, killed $vgpr1, 0, implicit $exec + ; GFX8-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec + ; GFX8-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc + ; GFX8-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 + ; GFX8-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc + ; + ; GFX900-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live + ; GFX900: liveins: $sgpr4 + ; GFX900-NEXT: {{ $}} + ; GFX900-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX900-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc + ; GFX900-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc + ; GFX900-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc + ; GFX900-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec + ; GFX900-NEXT: $vgpr1 = V_ADD_U32_e32 12288, killed $vgpr1, implicit $exec + ; GFX900-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec + ; GFX900-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc + ; GFX900-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 + ; GFX900-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc + ; + ; GFX90A-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live + ; GFX90A: liveins: $sgpr4 + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX90A-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc + ; GFX90A-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc + ; GFX90A-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc + ; GFX90A-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec + ; GFX90A-NEXT: $vgpr1 = V_ADD_U32_e32 12288, killed $vgpr1, implicit $exec + ; GFX90A-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec + ; GFX90A-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc + ; GFX90A-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 + ; GFX90A-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc + ; + ; GFX10-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live + ; GFX10: liveins: $sgpr4 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX10-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc + ; GFX10-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc + ; GFX10-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc + ; GFX10-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec + ; GFX10-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec + ; GFX10-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec + ; GFX10-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc + ; GFX10-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 + ; GFX10-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc + ; + ; GFX940-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live + ; GFX940: liveins: $sgpr4 + ; GFX940-NEXT: {{ $}} + ; GFX940-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX940-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc + ; GFX940-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc + ; GFX940-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc + ; GFX940-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 12288, implicit-def $scc + ; GFX940-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 64, killed $sgpr5, 0, implicit $exec + ; GFX940-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -32768, implicit-def dead $scc + ; GFX940-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 + ; GFX940-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc + ; + ; GFX11-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live + ; GFX11: liveins: $sgpr4 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX11-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc + ; GFX11-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc + ; GFX11-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc + ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec + ; GFX11-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec + ; GFX11-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec + ; GFX11-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -32768, implicit-def dead $scc + ; GFX11-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 + ; GFX11-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc + ; + ; GFX12-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live + ; GFX12: liveins: $sgpr4 + ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX12-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc + ; GFX12-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc + ; GFX12-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24576, implicit-def dead $scc + ; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec + ; GFX12-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 4160, killed $vgpr1, 0, implicit $exec + ; GFX12-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec + ; GFX12-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -24576, implicit-def dead $scc + ; GFX12-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 + ; GFX12-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc + renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 64, %stack.1, 0, implicit $exec + SI_RETURN implicit $vgpr0, implicit $vcc + +... diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir index 9c2fef05124d7..de198941b565e 100644 --- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir +++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir @@ -1692,3 +1692,61 @@ body: | SI_RETURN implicit $vgpr0 ... + +--- +name: v_add_u32_e64_imm_fi_vop3_literal_error +tracksRegLiveness: true +frameInfo: + localFrameSize: 12576 +stack: + - { id: 0, size: 4, alignment: 8192, local-offset: 0 } + - { id: 1, size: 8480, alignment: 4096, local-offset: 4096 } +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' +body: | + bb.0: + ; MUBUF-LABEL: name: v_add_u32_e64_imm_fi_vop3_literal_error + ; MUBUF: liveins: $sgpr4 + ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; MUBUF-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc + ; MUBUF-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc + ; MUBUF-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc + ; MUBUF-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec + ; MUBUF-NEXT: $vgpr1 = V_ADD_U32_e32 12288, killed $vgpr1, implicit $exec + ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e64 64, killed $vgpr1, 0, implicit $exec + ; MUBUF-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc + ; MUBUF-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 + ; MUBUF-NEXT: SI_RETURN implicit $vgpr0 + ; + ; MUBUFW32-LABEL: name: v_add_u32_e64_imm_fi_vop3_literal_error + ; MUBUFW32: liveins: $sgpr4 + ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; MUBUFW32-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 262112, implicit-def $scc + ; MUBUFW32-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294705152, implicit-def dead $scc + ; MUBUFW32-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 1048576, implicit-def dead $scc + ; MUBUFW32-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 5, $sgpr33, implicit $exec + ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 12352, killed $vgpr1, 0, implicit $exec + ; MUBUFW32-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -1048576, implicit-def dead $scc + ; MUBUFW32-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 + ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0 + ; + ; FLATSCRW64-LABEL: name: v_add_u32_e64_imm_fi_vop3_literal_error + ; FLATSCRW64: liveins: $sgpr4 + ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; FLATSCRW64-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc + ; FLATSCRW64-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc + ; FLATSCRW64-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc + ; FLATSCRW64-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 12288, implicit-def $scc + ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e64 64, killed $sgpr5, 0, implicit $exec + ; FLATSCRW64-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -32768, implicit-def dead $scc + ; FLATSCRW64-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 + ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 + renamable $vgpr0 = V_ADD_U32_e64 64, %stack.1, 0, implicit $exec + SI_RETURN implicit $vgpr0 + +... diff --git a/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll b/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll index c11b7d67a8a21..b1ea275a97a39 100644 --- a/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll +++ b/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll @@ -327,4 +327,23 @@ entry: ret void } +; GCN-LABEL: {{^}}fi_vop3_literal_error: +; CI: v_lshr_b32_e64 [[SCALED_FP:v[0-9]+]], s33, 6 +; CI: s_movk_i32 vcc_lo, 0x3000 +; CI-NEXT: v_add_i32_e32 [[SCALED_FP]], vcc, vcc_lo, [[SCALED_FP]] +; CI-NEXT: v_add_i32_e32 v0, vcc, 64, [[SCALED_FP]] + +; GFX9-MUBUF: v_lshrrev_b32_e64 [[SCALED_FP:v[0-9]+]], 6, s33 +; GFX9-MUBUF-NEXT: v_add_u32_e32 [[SCALED_FP]], 0x3000, [[SCALED_FP]] +; GFX9-MUBUF-NEXT: v_add_u32_e32 v0, 64, [[SCALED_FP]] +define void @fi_vop3_literal_error() { +entry: + %pin.low = alloca i32, align 8192, addrspace(5) + %local.area = alloca [1060 x i64], align 4096, addrspace(5) + store i32 0, ptr addrspace(5) %pin.low, align 4 + %gep.small.offset = getelementptr i8, ptr addrspace(5) %local.area, i64 64 + %load1 = load volatile i64, ptr addrspace(5) %gep.small.offset, align 4 + ret void +} + attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll index 12afc26735422..29fbb0bb1c6c9 100644 --- a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll +++ b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll @@ -20,8 +20,9 @@ define amdgpu_kernel void @local_stack_offset_uses_sp(ptr addrspace(1) %out) { ; MUBUF-LABEL: local_stack_offset_uses_sp: ; MUBUF: ; %bb.0: ; %entry ; MUBUF-NEXT: s_add_u32 s0, s0, s17 +; MUBUF-NEXT: v_mov_b32_e32 v1, 0x3000 ; MUBUF-NEXT: s_addc_u32 s1, s1, 0 -; MUBUF-NEXT: v_mov_b32_e32 v0, 0x3040 +; MUBUF-NEXT: v_add_u32_e32 v0, 64, v1 ; MUBUF-NEXT: v_mov_b32_e32 v1, 0 ; MUBUF-NEXT: v_mov_b32_e32 v2, 0x2000 ; MUBUF-NEXT: s_mov_b32 s4, 0 @@ -110,7 +111,8 @@ define void @func_local_stack_offset_uses_sp(ptr addrspace(1) %out) { ; MUBUF-NEXT: s_add_i32 s33, s32, 0x7ffc0 ; MUBUF-NEXT: s_and_b32 s33, s33, 0xfff80000 ; MUBUF-NEXT: v_lshrrev_b32_e64 v3, 6, s33 -; MUBUF-NEXT: v_add_u32_e32 v2, 0x3040, v3 +; MUBUF-NEXT: v_add_u32_e32 v3, 0x3000, v3 +; MUBUF-NEXT: v_add_u32_e32 v2, 64, v3 ; MUBUF-NEXT: v_mov_b32_e32 v3, 0 ; MUBUF-NEXT: v_mov_b32_e32 v4, 0x2000 ; MUBUF-NEXT: s_mov_b32 s4, 0