Skip to content

Commit e591af5

Browse files
committed
Properly handle undef statements in LiveIntervals::handleMove
If a subregister define is moved before a define of a different subregister (but same base regsiter) and the other define marks the remainder of the register as undefined, the moved define will be overwritten by the undef. To prevent this, move the `undef` to the moved instruction. Fixes #130884.
1 parent c13003a commit e591af5

File tree

4 files changed

+44
-11
lines changed

4 files changed

+44
-11
lines changed

llvm/lib/CodeGen/LiveIntervals.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1570,6 +1570,29 @@ void LiveIntervals::handleMove(MachineInstr &MI, bool UpdateFlags) {
15701570

15711571
HMEditor HME(*this, *MRI, *TRI, OldIndex, NewIndex, UpdateFlags);
15721572
HME.updateAllRanges(&MI);
1573+
1574+
// Check to make sure that there are no subreg defintions marked undef
1575+
// after the moved operation. If so, mark the current instruction as undef
1576+
// instead.
1577+
for (MachineOperand &MO : MI.operands()) {
1578+
if (MO.isReg() && MO.isDef() && MO.getSubReg() != 0 && !MO.isUndef()) {
1579+
SlotIndex Index = Indexes->getInstructionIndex(MI);
1580+
LiveInterval &LI = getInterval(MO.getReg());
1581+
LiveRange::iterator IndexSeg = LI.find(Index);
1582+
if (std::next(IndexSeg) == LI.end())
1583+
continue;
1584+
if (MachineInstr *NextMI =
1585+
getInstructionFromIndex(std::next(IndexSeg)->valno->def)) {
1586+
for (MachineOperand &NextMO : NextMI->operands()) {
1587+
if (NextMO.isReg() && NextMO.isDef() && NextMO.getSubReg() != 0 &&
1588+
NextMO.isUndef() && NextMO.getReg() == MO.getReg()) {
1589+
MO.setIsUndef(true);
1590+
NextMO.setIsUndef(false);
1591+
}
1592+
}
1593+
}
1594+
}
1595+
}
15731596
}
15741597

15751598
void LiveIntervals::handleMoveIntoNewBundle(MachineInstr &BundleStart,

llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -729,7 +729,7 @@ body: |
729729
; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
730730
; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
731731
; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
732-
; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
732+
; GFX908-NEXT: [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
733733
; GFX908-NEXT: {{ $}}
734734
; GFX908-NEXT: bb.1:
735735
; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
@@ -2795,7 +2795,7 @@ body: |
27952795
; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
27962796
; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
27972797
; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
2798-
; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
2798+
; GFX908-NEXT: [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
27992799
; GFX908-NEXT: {{ $}}
28002800
; GFX908-NEXT: bb.1:
28012801
; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
@@ -2960,7 +2960,7 @@ body: |
29602960
; GFX908-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
29612961
; GFX908-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
29622962
; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
2963-
; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
2963+
; GFX908-NEXT: [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
29642964
; GFX908-NEXT: {{ $}}
29652965
; GFX908-NEXT: bb.1:
29662966
; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
@@ -3138,7 +3138,7 @@ body: |
31383138
; GFX908-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode, implicit-def $m0
31393139
; GFX908-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode, implicit-def $m0
31403140
; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
3141-
; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
3141+
; GFX908-NEXT: [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
31423142
; GFX908-NEXT: {{ $}}
31433143
; GFX908-NEXT: bb.1:
31443144
; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
@@ -3328,7 +3328,7 @@ body: |
33283328
; GFX908-NEXT: [[V_CVT_I32_F64_e32_33:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 33, implicit $exec, implicit $mode, implicit-def $m0
33293329
; GFX908-NEXT: [[V_CVT_I32_F64_e32_34:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 34, implicit $exec, implicit $mode, implicit-def $m0
33303330
; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
3331-
; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
3331+
; GFX908-NEXT: [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
33323332
; GFX908-NEXT: {{ $}}
33333333
; GFX908-NEXT: bb.1:
33343334
; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
@@ -6353,7 +6353,7 @@ body: |
63536353
; GFX908-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF31]], implicit $exec, implicit $mode
63546354
; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
63556355
; GFX908-NEXT: dead [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[COPY1]](s32), implicit $exec
6356-
; GFX908-NEXT: dead undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
6356+
; GFX908-NEXT: dead [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
63576357
; GFX908-NEXT: S_BRANCH %bb.1
63586358
; GFX908-NEXT: {{ $}}
63596359
; GFX908-NEXT: bb.1:
@@ -6553,7 +6553,7 @@ body: |
65536553
; GFX908-NEXT: [[V_CVT_I32_F32_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF31]], implicit $exec, implicit $mode
65546554
; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
65556555
; GFX908-NEXT: dead [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[COPY1]](s32), implicit $exec
6556-
; GFX908-NEXT: dead undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
6556+
; GFX908-NEXT: dead [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
65576557
; GFX908-NEXT: S_BRANCH %bb.1
65586558
; GFX908-NEXT: {{ $}}
65596559
; GFX908-NEXT: bb.1:

llvm/test/CodeGen/AMDGPU/machine-scheduler-undef-reorder.mir

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
12
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=machine-scheduler -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX900 %s
23

34
---
@@ -8,6 +9,15 @@ machineFunctionInfo:
89
body: |
910
bb.0:
1011
liveins: $vgpr2
12+
; GFX900-LABEL: name: test_undef_reorder
13+
; GFX900: liveins: $vgpr2
14+
; GFX900-NEXT: {{ $}}
15+
; GFX900-NEXT: $vgpr5 = COPY $vgpr2
16+
; GFX900-NEXT: undef [[COPY:%[0-9]+]].sub0:vreg_64 = COPY $vgpr5
17+
; GFX900-NEXT: $vgpr4 = COPY $vgpr2
18+
; GFX900-NEXT: [[COPY:%[0-9]+]].sub1:vreg_64 = COPY $vgpr4
19+
; GFX900-NEXT: dead [[COPY1:%[0-9]+]]:vreg_64 = COPY [[COPY]]
20+
; GFX900-NEXT: S_ENDPGM 0
1121
$vgpr5 = COPY $vgpr2
1222
$vgpr4 = COPY $vgpr2
1323
undef %18.sub1:vreg_64 = COPY killed $vgpr4

llvm/test/CodeGen/AMDGPU/shufflevector-physreg-copy.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -734,10 +734,10 @@ define i32 @shufflevector_v4i32_3210_physreg_even_vgpr_quad_copy_other_use_elt(p
734734
; GFX900-NEXT: ;;#ASMSTART
735735
; GFX900-NEXT: ; def v4, v5, v6, v7
736736
; GFX900-NEXT: ;;#ASMEND
737-
; GFX900-NEXT: v_mov_b32_e32 v9, v5
738-
; GFX900-NEXT: v_mov_b32_e32 v8, v6
739-
; GFX900-NEXT: v_mov_b32_e32 v10, v4
740-
; GFX900-NEXT: global_store_dwordx4 v0, v[7:10], s[16:17]
737+
; GFX900-NEXT: v_mov_b32_e32 v3, v5
738+
; GFX900-NEXT: v_mov_b32_e32 v2, v6
739+
; GFX900-NEXT: v_mov_b32_e32 v1, v7
740+
; GFX900-NEXT: global_store_dwordx4 v0, v[1:4], s[16:17]
741741
; GFX900-NEXT: v_mov_b32_e32 v0, v6
742742
; GFX900-NEXT: s_waitcnt vmcnt(0)
743743
; GFX900-NEXT: s_setpc_b64 s[30:31]

0 commit comments

Comments
 (0)