Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 16 additions & 8 deletions llvm/lib/CodeGen/RegisterCoalescer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1600,6 +1600,22 @@ bool RegisterCoalescer::reMaterializeDef(const CoalescerPair &CP,
SlotIndex DefIndex =
CurrIdx.getRegSlot(NewMI.getOperand(0).isEarlyClobber());
VNInfo::Allocator &Alloc = LIS->getVNInfoAllocator();

// Refine the subranges that are now defined by the remat.
// This will split existing subranges if necessary.
DstInt.refineSubRanges(
Alloc, DstMask,
[&DefIndex, &Alloc](LiveInterval::SubRange &SR) {
// We know that this lane is defined by this instruction,
// but at this point it might not be live because it was not defined
// by the original instruction. This happens when the
// rematerialization widens the defined register. Assign that lane a
// dead def so that the interferences are properly modeled.
if (!SR.liveAt(DefIndex))
SR.createDeadDef(DefIndex, Alloc);
},
*LIS->getSlotIndexes(), *TRI);

for (LiveInterval::SubRange &SR : DstInt.subranges()) {
if ((SR.LaneMask & DstMask).none()) {
LLVM_DEBUG(dbgs()
Expand All @@ -1617,14 +1633,6 @@ bool RegisterCoalescer::reMaterializeDef(const CoalescerPair &CP,
// updateRegDefUses. The original subrange def may have only undefed
// some lanes.
UpdatedSubRanges = true;
} else {
// We know that this lane is defined by this instruction,
// but at this point it might not be live because it was not defined
// by the original instruction. This happens when the
// rematerialization widens the defined register. Assign that lane a
// dead def so that the interferences are properly modeled.
if (!SR.liveAt(DefIndex))
SR.createDeadDef(DefIndex, Alloc);
}
}
if (UpdatedSubRanges)
Expand Down
131 changes: 131 additions & 0 deletions llvm/test/CodeGen/AMDGPU/reg-coalescer-subreg-liveness.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -run-pass=register-coalescer -verify-coalescing -o - %s | FileCheck %s

# This test is to check fix for failure with "Bad machine code: Defining instruction does not modify register" due to corrupt lane mask.

---
name: reg_coalescer_subreg_liveness
tracksRegLiveness: true
liveins:
body: |
; CHECK-LABEL: name: reg_coalescer_subreg_liveness
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $sgpr4_sgpr5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; CHECK-NEXT: undef [[S_LOAD_DWORD_IMM:%[0-9]+]].sub1:sgpr_128 = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s32), align 16, addrspace 4)
; CHECK-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub0:sgpr_128 = S_MOV_B32 1
; CHECK-NEXT: undef [[S_MOV_B32_1:%[0-9]+]].sub0:sgpr_256 = S_MOV_B32 0
; CHECK-NEXT: TENSOR_LOAD_TO_LDS_D2 [[S_MOV_B32_]], [[S_MOV_B32_1]], 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]].sub0:sgpr_128 = S_MOV_B32 1
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]].sub1:sgpr_128 = COPY [[S_MOV_B32_]].sub0
; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]].sub2:sgpr_128 = S_MOV_B32 0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]].sub2:sgpr_128 = COPY [[S_MOV_B32_]].sub0
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]].sub3:sgpr_128 = COPY [[S_MOV_B32_]].sub0
; CHECK-NEXT: TENSOR_LOAD_TO_LDS_D2 [[S_MOV_B32_]], [[S_MOV_B32_1]], 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
; CHECK-NEXT: TENSOR_LOAD_TO_LDS_D2 [[S_LOAD_DWORD_IMM]], [[S_MOV_B32_1]], 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
; CHECK-NEXT: $vcc_lo = COPY $exec_lo
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]].sub1:sgpr_128 = S_MOV_B32 0
; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]].sub2:sgpr_128 = S_MOV_B32 1
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc_lo, implicit $vcc_lo, implicit $vcc_lo
; CHECK-NEXT: S_BRANCH %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1(0x80000000)
liveins: $sgpr4_sgpr5

%0:sgpr_64 = COPY killed $sgpr4_sgpr5
%1:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM killed %0, 0, 0 :: (dereferenceable invariant load (s32), align 16, addrspace 4)
%2:sreg_32 = S_MOV_B32 1
undef %3.sub0:sgpr_128 = COPY %2
%4:sreg_32 = S_MOV_B32 0
undef %5.sub0:sgpr_256 = COPY %4
TENSOR_LOAD_TO_LDS_D2 %3, %5, 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
%6:sgpr_128 = COPY killed %3
%6.sub1:sgpr_128 = COPY killed %1
%7:sreg_32 = COPY $exec_lo
%8:sreg_32 = COPY %2
%9:sreg_32 = COPY %4

bb.1:
successors: %bb.1(0x40000000), %bb.2(0x40000000)

%10:sreg_32 = COPY killed %8
undef %11.sub0:sgpr_128 = COPY %2
%11.sub1:sgpr_128 = COPY killed %10
%11.sub2:sgpr_128 = COPY %2
%11.sub3:sgpr_128 = COPY %2
TENSOR_LOAD_TO_LDS_D2 killed %11, %5, 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
%12:sreg_32 = COPY killed %9
%13:sgpr_128 = COPY %6
%13.sub2:sgpr_128 = COPY killed %12
TENSOR_LOAD_TO_LDS_D2 killed %13, %5, 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
$vcc_lo = COPY %7
%8:sreg_32 = COPY %4
%9:sreg_32 = COPY %2
S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc_lo, implicit $vcc_lo, implicit $vcc
S_BRANCH %bb.2

bb.2:
S_ENDPGM 0
...
---
name: reg_coalescer_subreg_liveness_2
tracksRegLiveness: true
liveins:
body: |
; CHECK-LABEL: name: reg_coalescer_subreg_liveness_2
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $sgpr4_sgpr5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; CHECK-NEXT: undef [[S_LOAD_DWORD_IMM:%[0-9]+]].sub2:sgpr_128 = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s32), align 16, addrspace 4)
; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]].sub1:sgpr_128 = S_LOAD_DWORD_IMM [[COPY]], 24, 0 :: (dereferenceable invariant load (s32), align 8, addrspace 4)
; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]].sub0:sgpr_128 = S_MOV_B32 1
; CHECK-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub0:sgpr_256 = S_MOV_B32 0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_NOP 0, implicit [[S_LOAD_DWORD_IMM]], implicit [[S_MOV_B32_]]
; CHECK-NEXT: S_BRANCH %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1(0x80000000)
liveins: $sgpr4_sgpr5

%0:sgpr_64 = COPY killed $sgpr4_sgpr5
%1:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 0, 0 :: (dereferenceable invariant load (s32), align 16, addrspace 4)
%2:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM killed %0, 24, 0 :: (dereferenceable invariant load (s32), align 8, addrspace 4)
%3:sreg_32 = S_MOV_B32 1
undef %4.sub0:sgpr_128 = COPY %3
%5:sgpr_128 = COPY %4
%5.sub1:sgpr_128 = COPY killed %2
%6:sgpr_128 = COPY %5
%6.sub2:sgpr_128 = COPY killed %1
%7:sreg_32 = S_MOV_B32 0
undef %8.sub0:sgpr_256 = COPY %7
%9:sreg_32 = COPY %3

bb.1:
successors: %bb.2(0x80000000)

%10:sreg_32 = COPY killed %9
undef %11.sub0:sgpr_128 = COPY %3
%11.sub1:sgpr_128 = COPY killed %10
S_NOP 0, implicit %5, implicit %8
S_BRANCH %bb.2

bb.2:
S_ENDPGM 0
...
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/SystemZ/regcoal_remat_empty_subrange.ll
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@ define void @main(i16 %in) {
; CHECK-NEXT: locghile %r3, 1
; CHECK-NEXT: o %r0, 0(%r1)
; CHECK-NEXT: larl %r1, g_222
; CHECK-NEXT: lghi %r5, 0
; CHECK-NEXT: dsgfr %r2, %r0
; CHECK-NEXT: lghi %r3, 0
; CHECK-NEXT: stgrl %r2, g_39
; CHECK-NEXT: stc %r5, 19(%r1)
; CHECK-NEXT: stc %r3, 19(%r1)
; CHECK-NEXT: br %r14
%tmp = load i32, ptr @g_151, align 4
%tmp3 = or i32 %tmp, 1
Expand Down