Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 29 additions & 1 deletion llvm/lib/CodeGen/MachineScheduler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -955,9 +955,37 @@ void ScheduleDAGMI::moveInstruction(
BB->splice(InsertPos, BB, MI);

// Update LiveIntervals
if (LIS)
if (LIS) {
LIS->handleMove(*MI, /*UpdateFlags=*/true);

// Mark the move instruction definition as undef if needed, clear isUndef
// from the following instruction.
for (MachineOperand &MO : MI->operands()) {
if (MO.isReg() && MO.isDef() && MO.getSubReg() != 0) {
SlotIndex Index = LIS->getInstructionIndex(*MI);
LiveInterval &LI = LIS->getInterval(MO.getReg());
// If the regsiter isn't live prior to the SubReg def and not already
// marked as Undef, add Undef flag.
if (!LI.liveAt(Index) && !MO.isUndef()) {
MO.setIsUndef(true);
LiveRange::iterator NextSeg = std::next(LI.find(Index));
if (NextSeg != LI.end() && NextSeg->valno) {
if (MachineInstr *NextMI =
LIS->getInstructionFromIndex(NextSeg->valno->def)) {
// Remove Undef flag from the next def of this register in the
// LiveInterval since the moved instruction already marks the
// SubReg's as Undef
if (MachineOperand *NextMO =
NextMI->findRegisterDefOperand(MO.getReg(), TRI)) {
NextMO->setIsUndef(false);
}
}
}
}
}
}
}

// Recede RegionBegin if an instruction moves above the first.
if (RegionBegin == InsertPos)
RegionBegin = MI;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -729,7 +729,7 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
; GFX908-NEXT: [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
Expand Down Expand Up @@ -2795,7 +2795,7 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
; GFX908-NEXT: [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
Expand Down Expand Up @@ -2960,7 +2960,7 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
; GFX908-NEXT: [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
Expand Down Expand Up @@ -3138,7 +3138,7 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
; GFX908-NEXT: [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
Expand Down Expand Up @@ -3328,7 +3328,7 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_33:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 33, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_34:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 34, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
; GFX908-NEXT: [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
Expand Down Expand Up @@ -6353,7 +6353,7 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF31]], implicit $exec, implicit $mode
; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
; GFX908-NEXT: dead [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[COPY1]](s32), implicit $exec
; GFX908-NEXT: dead undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
; GFX908-NEXT: dead [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
; GFX908-NEXT: S_BRANCH %bb.1
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
Expand Down Expand Up @@ -6553,7 +6553,7 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F32_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF31]], implicit $exec, implicit $mode
; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
; GFX908-NEXT: dead [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[COPY1]](s32), implicit $exec
; GFX908-NEXT: dead undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
; GFX908-NEXT: dead [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
; GFX908-NEXT: S_BRANCH %bb.1
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
Expand Down
27 changes: 27 additions & 0 deletions llvm/test/CodeGen/AMDGPU/machine-scheduler-undef-reorder.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=machine-scheduler -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX900 %s

---
name: test_undef_reorder
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
bb.0:
liveins: $vgpr2
; GFX900-LABEL: name: test_undef_reorder
; GFX900: liveins: $vgpr2
; GFX900-NEXT: {{ $}}
; GFX900-NEXT: $vgpr5 = COPY $vgpr2
; GFX900-NEXT: undef [[COPY:%[0-9]+]].sub0:vreg_64 = COPY $vgpr5
; GFX900-NEXT: $vgpr4 = COPY $vgpr2
; GFX900-NEXT: [[COPY:%[0-9]+]].sub1:vreg_64 = COPY $vgpr4
; GFX900-NEXT: dead [[COPY1:%[0-9]+]]:vreg_64 = COPY [[COPY]]
; GFX900-NEXT: S_ENDPGM 0
$vgpr5 = COPY $vgpr2
$vgpr4 = COPY $vgpr2
undef %18.sub1:vreg_64 = COPY killed $vgpr4
%18.sub0:vreg_64 = COPY killed $vgpr5
%19: vreg_64 = COPY %18
S_ENDPGM 0
...
15 changes: 7 additions & 8 deletions llvm/test/CodeGen/AMDGPU/shufflevector-physreg-copy.ll
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; FIXME: Fails expensive checks, should re-enable verifier, see issue #130884
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs=0 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs=0 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -verify-machineinstrs=0 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s

; Test that we can form v_pk_mov_b32 in certain shuffles when they
; originate from 32-bit physreg copy sequences.
Expand Down Expand Up @@ -735,10 +734,10 @@ define i32 @shufflevector_v4i32_3210_physreg_even_vgpr_quad_copy_other_use_elt(p
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v4, v5, v6, v7
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v9, v5
; GFX900-NEXT: v_mov_b32_e32 v8, v6
; GFX900-NEXT: v_mov_b32_e32 v10, v4
; GFX900-NEXT: global_store_dwordx4 v0, v[7:10], s[16:17]
; GFX900-NEXT: v_mov_b32_e32 v3, v5
; GFX900-NEXT: v_mov_b32_e32 v2, v6
; GFX900-NEXT: v_mov_b32_e32 v1, v7
; GFX900-NEXT: global_store_dwordx4 v0, v[1:4], s[16:17]
; GFX900-NEXT: v_mov_b32_e32 v0, v6
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
Expand Down