Skip to content

Commit 3308dfe

Browse files
committed
Properly handle undef statements in ScheduleDAGMI::moveInstruction
If a subregister define is moved before a define of a different subregister (but same base regsiter) and the other fine marks the remainder of the register as undefined, the moved define will be overwritten by the undef. To prevent this, move the `undef` to the moved instruction. Fixes #130884.
1 parent 5d38944 commit 3308dfe

File tree

2 files changed

+30
-4
lines changed

2 files changed

+30
-4
lines changed

llvm/lib/CodeGen/MachineScheduler.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3979,6 +3979,32 @@ void GenericScheduler::reschedulePhysReg(SUnit *SU, bool isTop) {
39793979
continue;
39803980
LLVM_DEBUG(dbgs() << " Rescheduling physreg copy ";
39813981
DAG->dumpNode(*Dep.getSUnit()));
3982+
3983+
// Check to make sure that there are no subreg defintions of the given
3984+
// register between it's new and old location that are marked as undef. If
3985+
// so, mark the current instruction as undef instead.
3986+
SmallVector<MachineOperand *, 1> SubregDefs;
3987+
for (MachineOperand &MO : Copy->operands()) {
3988+
if (MO.isReg() && MO.isDef() && MO.getSubReg() != 0) {
3989+
SubregDefs.push_back(&MO);
3990+
}
3991+
}
3992+
if (SubregDefs.size()) {
3993+
for (auto CurrInst = InsertPos; CurrInst != Copy; ++CurrInst) {
3994+
for (MachineOperand &MO : CurrInst->operands()) {
3995+
if (MO.isReg() && MO.isDef() && MO.isUndef() && MO.getSubReg() != 0) {
3996+
for (auto *MISubregDef : SubregDefs) {
3997+
if (MISubregDef->getReg() == MO.getReg()) {
3998+
assert(!MISubregDef->isUndef() &&
3999+
"Register defined as undef twice.");
4000+
MO.setIsUndef(false);
4001+
MISubregDef->setIsUndef(true);
4002+
}
4003+
}
4004+
}
4005+
}
4006+
}
4007+
}
39824008
DAG->moveInstruction(Copy, InsertPos);
39834009
}
39844010
}

llvm/test/CodeGen/AMDGPU/shufflevector-physreg-copy.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -734,10 +734,10 @@ define i32 @shufflevector_v4i32_3210_physreg_even_vgpr_quad_copy_other_use_elt(p
734734
; GFX900-NEXT: ;;#ASMSTART
735735
; GFX900-NEXT: ; def v4, v5, v6, v7
736736
; GFX900-NEXT: ;;#ASMEND
737-
; GFX900-NEXT: v_mov_b32_e32 v9, v5
738-
; GFX900-NEXT: v_mov_b32_e32 v8, v6
739-
; GFX900-NEXT: v_mov_b32_e32 v10, v4
740-
; GFX900-NEXT: global_store_dwordx4 v0, v[7:10], s[16:17]
737+
; GFX900-NEXT: v_mov_b32_e32 v3, v5
738+
; GFX900-NEXT: v_mov_b32_e32 v2, v6
739+
; GFX900-NEXT: v_mov_b32_e32 v1, v7
740+
; GFX900-NEXT: global_store_dwordx4 v0, v[1:4], s[16:17]
741741
; GFX900-NEXT: v_mov_b32_e32 v0, v6
742742
; GFX900-NEXT: s_waitcnt vmcnt(0)
743743
; GFX900-NEXT: s_setpc_b64 s[30:31]

0 commit comments

Comments
 (0)