Skip to content

Commit e20e4fd

Browse files
committed
- Avoid const_cast
1 parent e0a2ea9 commit e20e4fd

File tree

1 file changed

+24
-16
lines changed

1 file changed

+24
-16
lines changed

llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp

Lines changed: 24 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3405,11 +3405,15 @@ bool GCNHazardRecognizer::fixVALUMaskWriteHazard(MachineInstr *MI) {
34053405
}
34063406
};
34073407

3408+
const unsigned ConstantMaskBits = AMDGPU::DepCtr::encodeFieldSaSdst(
3409+
AMDGPU::DepCtr::encodeFieldVaSdst(AMDGPU::DepCtr::encodeFieldVaVcc(0), 0),
3410+
0);
34083411
auto UpdateStateFn = [&](StateType &State, const MachineInstr &I) {
34093412
switch (I.getOpcode()) {
34103413
case AMDGPU::S_WAITCNT_DEPCTR:
3411-
// Record waits within region of instructions free of SGPR reads.
3412-
if (!HasSGPRRead && I.getParent() == MI->getParent())
3414+
// Record mergable waits within region of instructions free of SGPR reads.
3415+
if (!HasSGPRRead && I.getParent() == MI->getParent() && !I.isBundled() &&
3416+
(I.getOperand(0).getImm() & ConstantMaskBits) == ConstantMaskBits)
34133417
WaitInstrs.push_back(&I);
34143418
break;
34153419
default:
@@ -3459,21 +3463,22 @@ bool GCNHazardRecognizer::fixVALUMaskWriteHazard(MachineInstr *MI) {
34593463
: AMDGPU::DepCtr::encodeFieldSaSdst(0);
34603464

34613465
// Try to merge previous waits into this one for regions with no SGPR reads.
3462-
if (WaitInstrs.size()) {
3463-
const unsigned ConstantBits = AMDGPU::DepCtr::encodeFieldSaSdst(
3464-
AMDGPU::DepCtr::encodeFieldVaSdst(AMDGPU::DepCtr::encodeFieldVaVcc(0),
3465-
0),
3466-
0);
3467-
3468-
for (const MachineInstr *Instr : WaitInstrs) {
3469-
// Don't touch bundled waits.
3470-
if (Instr->isBundled())
3466+
if (!WaitInstrs.empty()) {
3467+
// Note: WaitInstrs contains const pointers, so walk backward from MI to
3468+
// obtain a mutable pointer to each instruction to be merged.
3469+
// This is expected to be a very short walk within the same block.
3470+
SmallVector<MachineInstr *> ToErase;
3471+
unsigned Found = 0;
3472+
for (MachineBasicBlock::reverse_iterator It = MI->getReverseIterator(),
3473+
End = MI->getParent()->rend();
3474+
Found < WaitInstrs.size() && It != End; ++It) {
3475+
MachineInstr *WaitMI = &*It;
3476+
// Find next wait instruction.
3477+
if (std::as_const(WaitMI) != WaitInstrs[Found])
34713478
continue;
3472-
MachineInstr *WaitMI = const_cast<MachineInstr *>(Instr);
3479+
Found++;
34733480
unsigned WaitMask = WaitMI->getOperand(0).getImm();
3474-
// Only work with counters related to this hazard.
3475-
if ((WaitMask & ConstantBits) != ConstantBits)
3476-
continue;
3481+
assert((WaitMask & ConstantMaskBits) == ConstantMaskBits);
34773482
DepCtr = AMDGPU::DepCtr::encodeFieldSaSdst(
34783483
DepCtr, std::min(AMDGPU::DepCtr::decodeFieldSaSdst(WaitMask),
34793484
AMDGPU::DepCtr::decodeFieldSaSdst(DepCtr)));
@@ -3483,8 +3488,11 @@ bool GCNHazardRecognizer::fixVALUMaskWriteHazard(MachineInstr *MI) {
34833488
DepCtr = AMDGPU::DepCtr::encodeFieldVaVcc(
34843489
DepCtr, std::min(AMDGPU::DepCtr::decodeFieldVaVcc(WaitMask),
34853490
AMDGPU::DepCtr::decodeFieldVaVcc(DepCtr)));
3486-
WaitMI->eraseFromParent();
3491+
ToErase.push_back(WaitMI);
34873492
}
3493+
assert(Found == WaitInstrs.size());
3494+
for (MachineInstr *WaitMI : ToErase)
3495+
WaitMI->eraseFromParent();
34883496
}
34893497

34903498
// Add s_waitcnt_depctr after SGPR write.

0 commit comments

Comments
 (0)