Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions llvm/lib/CodeGen/RegisterCoalescer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1625,6 +1625,7 @@ bool RegisterCoalescer::reMaterializeDef(const CoalescerPair &CP,
// dead def so that the interferences are properly modeled.
if (!SR.liveAt(DefIndex))
SR.createDeadDef(DefIndex, Alloc);
SR.LaneMask = DstMask & SR.LaneMask;
}
}
if (UpdatedSubRanges)
Expand Down
78 changes: 78 additions & 0 deletions llvm/test/CodeGen/AMDGPU/reg-coalescer-subreg-liveness.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -run-pass=register-coalescer -verify-coalescing -o - %s | FileCheck %s

# This test is to check fix for failure with "Bad machine code: Defining instruction does not modify register" due to corrupt lane mask.

---
name: reg_coalescer_subreg_liveness
tracksRegLiveness: true
liveins:
body: |
; CHECK-LABEL: name: reg_coalescer_subreg_liveness
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $sgpr4_sgpr5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; CHECK-NEXT: undef [[S_LOAD_DWORD_IMM:%[0-9]+]].sub1:sgpr_128 = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s32), align 16, addrspace 4)
; CHECK-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub0:sgpr_128 = S_MOV_B32 1
; CHECK-NEXT: undef [[S_MOV_B32_1:%[0-9]+]].sub0:sgpr_256 = S_MOV_B32 0
; CHECK-NEXT: TENSOR_LOAD_TO_LDS_D2 [[S_MOV_B32_]], [[S_MOV_B32_1]], 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]].sub0:sgpr_128 = S_MOV_B32 1
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]].sub1:sgpr_128 = COPY [[S_MOV_B32_]].sub0
; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]].sub2:sgpr_128 = S_MOV_B32 0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]].sub2:sgpr_128 = COPY [[S_MOV_B32_]].sub0
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]].sub3:sgpr_128 = COPY [[S_MOV_B32_]].sub0
; CHECK-NEXT: TENSOR_LOAD_TO_LDS_D2 [[S_MOV_B32_]], [[S_MOV_B32_1]], 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
; CHECK-NEXT: TENSOR_LOAD_TO_LDS_D2 [[S_LOAD_DWORD_IMM]], [[S_MOV_B32_1]], 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
; CHECK-NEXT: $vcc_lo = COPY $exec_lo
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]].sub1:sgpr_128 = S_MOV_B32 0
; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]].sub2:sgpr_128 = S_MOV_B32 1
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc_lo, implicit $vcc_lo, implicit $vcc_lo
; CHECK-NEXT: S_BRANCH %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1(0x80000000)
liveins: $sgpr4_sgpr5

%0:sgpr_64 = COPY killed $sgpr4_sgpr5
%1:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM killed %0, 0, 0 :: (dereferenceable invariant load (s32), align 16, addrspace 4)
%2:sreg_32 = S_MOV_B32 1
undef %3.sub0:sgpr_128 = COPY %2
%4:sreg_32 = S_MOV_B32 0
undef %5.sub0:sgpr_256 = COPY %4
TENSOR_LOAD_TO_LDS_D2 %3, %5, 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
%6:sgpr_128 = COPY killed %3
%6.sub1:sgpr_128 = COPY killed %1
%7:sreg_32 = COPY $exec_lo
%8:sreg_32 = COPY %2
%9:sreg_32 = COPY %4

bb.1:
successors: %bb.1(0x40000000), %bb.2(0x40000000)

%10:sreg_32 = COPY killed %8
undef %11.sub0:sgpr_128 = COPY %2
%11.sub1:sgpr_128 = COPY killed %10
%11.sub2:sgpr_128 = COPY %2
%11.sub3:sgpr_128 = COPY %2
TENSOR_LOAD_TO_LDS_D2 killed %11, %5, 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
%12:sreg_32 = COPY killed %9
%13:sgpr_128 = COPY %6
%13.sub2:sgpr_128 = COPY killed %12
TENSOR_LOAD_TO_LDS_D2 killed %13, %5, 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
$vcc_lo = COPY %7
%8:sreg_32 = COPY %4
%9:sreg_32 = COPY %2
S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc_lo, implicit $vcc_lo, implicit $vcc
S_BRANCH %bb.2

bb.2:
S_ENDPGM 0
...
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/SystemZ/regcoal_remat_empty_subrange.ll
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@ define void @main(i16 %in) {
; CHECK-NEXT: locghile %r3, 1
; CHECK-NEXT: o %r0, 0(%r1)
; CHECK-NEXT: larl %r1, g_222
; CHECK-NEXT: lghi %r5, 0
; CHECK-NEXT: dsgfr %r2, %r0
; CHECK-NEXT: lghi %r3, 0
; CHECK-NEXT: stgrl %r2, g_39
; CHECK-NEXT: stc %r5, 19(%r1)
; CHECK-NEXT: stc %r3, 19(%r1)
; CHECK-NEXT: br %r14
%tmp = load i32, ptr @g_151, align 4
%tmp3 = or i32 %tmp, 1
Expand Down
Loading