Skip to content

Commit b5f2001

Browse files
authored
[CodeGen] Register-coalescer remat fix subreg liveness (#165662)
This is a bugfix in rematerialization where the liveness of subreg mask was incorrectly updated causing crash in scheduler.
1 parent cdf52a1 commit b5f2001

File tree

3 files changed

+149
-10
lines changed

3 files changed

+149
-10
lines changed

llvm/lib/CodeGen/RegisterCoalescer.cpp

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1600,6 +1600,22 @@ bool RegisterCoalescer::reMaterializeDef(const CoalescerPair &CP,
16001600
SlotIndex DefIndex =
16011601
CurrIdx.getRegSlot(NewMI.getOperand(0).isEarlyClobber());
16021602
VNInfo::Allocator &Alloc = LIS->getVNInfoAllocator();
1603+
1604+
// Refine the subranges that are now defined by the remat.
1605+
// This will split existing subranges if necessary.
1606+
DstInt.refineSubRanges(
1607+
Alloc, DstMask,
1608+
[&DefIndex, &Alloc](LiveInterval::SubRange &SR) {
1609+
// We know that this lane is defined by this instruction,
1610+
// but at this point it might not be live because it was not defined
1611+
// by the original instruction. This happens when the
1612+
// rematerialization widens the defined register. Assign that lane a
1613+
// dead def so that the interferences are properly modeled.
1614+
if (!SR.liveAt(DefIndex))
1615+
SR.createDeadDef(DefIndex, Alloc);
1616+
},
1617+
*LIS->getSlotIndexes(), *TRI);
1618+
16031619
for (LiveInterval::SubRange &SR : DstInt.subranges()) {
16041620
if ((SR.LaneMask & DstMask).none()) {
16051621
LLVM_DEBUG(dbgs()
@@ -1617,14 +1633,6 @@ bool RegisterCoalescer::reMaterializeDef(const CoalescerPair &CP,
16171633
// updateRegDefUses. The original subrange def may have only undefed
16181634
// some lanes.
16191635
UpdatedSubRanges = true;
1620-
} else {
1621-
// We know that this lane is defined by this instruction,
1622-
// but at this point it might not be live because it was not defined
1623-
// by the original instruction. This happens when the
1624-
// rematerialization widens the defined register. Assign that lane a
1625-
// dead def so that the interferences are properly modeled.
1626-
if (!SR.liveAt(DefIndex))
1627-
SR.createDeadDef(DefIndex, Alloc);
16281636
}
16291637
}
16301638
if (UpdatedSubRanges)
Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
2+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -run-pass=register-coalescer -verify-coalescing -o - %s | FileCheck %s
3+
4+
# This test is to check fix for failure with "Bad machine code: Defining instruction does not modify register" due to corrupt lane mask.
5+
6+
---
7+
name: reg_coalescer_subreg_liveness
8+
tracksRegLiveness: true
9+
liveins:
10+
body: |
11+
; CHECK-LABEL: name: reg_coalescer_subreg_liveness
12+
; CHECK: bb.0:
13+
; CHECK-NEXT: successors: %bb.1(0x80000000)
14+
; CHECK-NEXT: liveins: $sgpr4_sgpr5
15+
; CHECK-NEXT: {{ $}}
16+
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
17+
; CHECK-NEXT: undef [[S_LOAD_DWORD_IMM:%[0-9]+]].sub1:sgpr_128 = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s32), align 16, addrspace 4)
18+
; CHECK-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub0:sgpr_128 = S_MOV_B32 1
19+
; CHECK-NEXT: undef [[S_MOV_B32_1:%[0-9]+]].sub0:sgpr_256 = S_MOV_B32 0
20+
; CHECK-NEXT: TENSOR_LOAD_TO_LDS_D2 [[S_MOV_B32_]], [[S_MOV_B32_1]], 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
21+
; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]].sub0:sgpr_128 = S_MOV_B32 1
22+
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]].sub1:sgpr_128 = COPY [[S_MOV_B32_]].sub0
23+
; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]].sub2:sgpr_128 = S_MOV_B32 0
24+
; CHECK-NEXT: {{ $}}
25+
; CHECK-NEXT: bb.1:
26+
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
27+
; CHECK-NEXT: {{ $}}
28+
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]].sub2:sgpr_128 = COPY [[S_MOV_B32_]].sub0
29+
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]].sub3:sgpr_128 = COPY [[S_MOV_B32_]].sub0
30+
; CHECK-NEXT: TENSOR_LOAD_TO_LDS_D2 [[S_MOV_B32_]], [[S_MOV_B32_1]], 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
31+
; CHECK-NEXT: TENSOR_LOAD_TO_LDS_D2 [[S_LOAD_DWORD_IMM]], [[S_MOV_B32_1]], 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
32+
; CHECK-NEXT: $vcc_lo = COPY $exec_lo
33+
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]].sub1:sgpr_128 = S_MOV_B32 0
34+
; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]].sub2:sgpr_128 = S_MOV_B32 1
35+
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc_lo, implicit $vcc_lo, implicit $vcc_lo
36+
; CHECK-NEXT: S_BRANCH %bb.2
37+
; CHECK-NEXT: {{ $}}
38+
; CHECK-NEXT: bb.2:
39+
; CHECK-NEXT: S_ENDPGM 0
40+
bb.0:
41+
successors: %bb.1(0x80000000)
42+
liveins: $sgpr4_sgpr5
43+
44+
%0:sgpr_64 = COPY killed $sgpr4_sgpr5
45+
%1:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM killed %0, 0, 0 :: (dereferenceable invariant load (s32), align 16, addrspace 4)
46+
%2:sreg_32 = S_MOV_B32 1
47+
undef %3.sub0:sgpr_128 = COPY %2
48+
%4:sreg_32 = S_MOV_B32 0
49+
undef %5.sub0:sgpr_256 = COPY %4
50+
TENSOR_LOAD_TO_LDS_D2 %3, %5, 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
51+
%6:sgpr_128 = COPY killed %3
52+
%6.sub1:sgpr_128 = COPY killed %1
53+
%7:sreg_32 = COPY $exec_lo
54+
%8:sreg_32 = COPY %2
55+
%9:sreg_32 = COPY %4
56+
57+
bb.1:
58+
successors: %bb.1(0x40000000), %bb.2(0x40000000)
59+
60+
%10:sreg_32 = COPY killed %8
61+
undef %11.sub0:sgpr_128 = COPY %2
62+
%11.sub1:sgpr_128 = COPY killed %10
63+
%11.sub2:sgpr_128 = COPY %2
64+
%11.sub3:sgpr_128 = COPY %2
65+
TENSOR_LOAD_TO_LDS_D2 killed %11, %5, 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
66+
%12:sreg_32 = COPY killed %9
67+
%13:sgpr_128 = COPY %6
68+
%13.sub2:sgpr_128 = COPY killed %12
69+
TENSOR_LOAD_TO_LDS_D2 killed %13, %5, 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
70+
$vcc_lo = COPY %7
71+
%8:sreg_32 = COPY %4
72+
%9:sreg_32 = COPY %2
73+
S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc_lo, implicit $vcc_lo, implicit $vcc
74+
S_BRANCH %bb.2
75+
76+
bb.2:
77+
S_ENDPGM 0
78+
...
79+
---
80+
name: reg_coalescer_subreg_liveness_2
81+
tracksRegLiveness: true
82+
liveins:
83+
body: |
84+
; CHECK-LABEL: name: reg_coalescer_subreg_liveness_2
85+
; CHECK: bb.0:
86+
; CHECK-NEXT: successors: %bb.1(0x80000000)
87+
; CHECK-NEXT: liveins: $sgpr4_sgpr5
88+
; CHECK-NEXT: {{ $}}
89+
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
90+
; CHECK-NEXT: undef [[S_LOAD_DWORD_IMM:%[0-9]+]].sub2:sgpr_128 = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s32), align 16, addrspace 4)
91+
; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]].sub1:sgpr_128 = S_LOAD_DWORD_IMM [[COPY]], 24, 0 :: (dereferenceable invariant load (s32), align 8, addrspace 4)
92+
; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]].sub0:sgpr_128 = S_MOV_B32 1
93+
; CHECK-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub0:sgpr_256 = S_MOV_B32 0
94+
; CHECK-NEXT: {{ $}}
95+
; CHECK-NEXT: bb.1:
96+
; CHECK-NEXT: successors: %bb.2(0x80000000)
97+
; CHECK-NEXT: {{ $}}
98+
; CHECK-NEXT: S_NOP 0, implicit [[S_LOAD_DWORD_IMM]], implicit [[S_MOV_B32_]]
99+
; CHECK-NEXT: S_BRANCH %bb.2
100+
; CHECK-NEXT: {{ $}}
101+
; CHECK-NEXT: bb.2:
102+
; CHECK-NEXT: S_ENDPGM 0
103+
bb.0:
104+
successors: %bb.1(0x80000000)
105+
liveins: $sgpr4_sgpr5
106+
107+
%0:sgpr_64 = COPY killed $sgpr4_sgpr5
108+
%1:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 0, 0 :: (dereferenceable invariant load (s32), align 16, addrspace 4)
109+
%2:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM killed %0, 24, 0 :: (dereferenceable invariant load (s32), align 8, addrspace 4)
110+
%3:sreg_32 = S_MOV_B32 1
111+
undef %4.sub0:sgpr_128 = COPY %3
112+
%5:sgpr_128 = COPY %4
113+
%5.sub1:sgpr_128 = COPY killed %2
114+
%6:sgpr_128 = COPY %5
115+
%6.sub2:sgpr_128 = COPY killed %1
116+
%7:sreg_32 = S_MOV_B32 0
117+
undef %8.sub0:sgpr_256 = COPY %7
118+
%9:sreg_32 = COPY %3
119+
120+
bb.1:
121+
successors: %bb.2(0x80000000)
122+
123+
%10:sreg_32 = COPY killed %9
124+
undef %11.sub0:sgpr_128 = COPY %3
125+
%11.sub1:sgpr_128 = COPY killed %10
126+
S_NOP 0, implicit %5, implicit %8
127+
S_BRANCH %bb.2
128+
129+
bb.2:
130+
S_ENDPGM 0
131+
...

llvm/test/CodeGen/SystemZ/regcoal_remat_empty_subrange.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,10 @@ define void @main(i16 %in) {
2222
; CHECK-NEXT: locghile %r3, 1
2323
; CHECK-NEXT: o %r0, 0(%r1)
2424
; CHECK-NEXT: larl %r1, g_222
25-
; CHECK-NEXT: lghi %r5, 0
2625
; CHECK-NEXT: dsgfr %r2, %r0
26+
; CHECK-NEXT: lghi %r3, 0
2727
; CHECK-NEXT: stgrl %r2, g_39
28-
; CHECK-NEXT: stc %r5, 19(%r1)
28+
; CHECK-NEXT: stc %r3, 19(%r1)
2929
; CHECK-NEXT: br %r14
3030
%tmp = load i32, ptr @g_151, align 4
3131
%tmp3 = or i32 %tmp, 1

0 commit comments

Comments
 (0)