diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp index 2e1f498c090d1..8dd1a0bf0837f 100644 --- a/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -1866,7 +1866,7 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg, // A subreg use of a partially undef (super) register may be a complete // undef use now and then has to be marked that way. - if (MO.isUse() && !DstIsPhys) { + if (MO.isUse() && !MO.isUndef() && !DstIsPhys) { unsigned SubUseIdx = TRI->composeSubRegIndices(SubIdx, MO.getSubReg()); if (SubUseIdx != 0 && MRI->shouldTrackSubRegLiveness(DstReg)) { if (!DstInt->hasSubRanges()) { diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-undef-subreg-use-invalid-lanemask.mir b/llvm/test/CodeGen/AMDGPU/coalescer-undef-subreg-use-invalid-lanemask.mir new file mode 100644 index 0000000000000..70b92831a0ecc --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/coalescer-undef-subreg-use-invalid-lanemask.mir @@ -0,0 +1,59 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -run-pass=register-coalescer -verify-coalescing -o - %s | FileCheck %s + +# Test that an invalid subreg range is not introduced due to the undef +# %1.sub0 use. An undef use with a subregister index would end up +# introducing subranges for the empty and full lanemasks. + +--- +name: merge_with_undef_subreg_use_subrange_lanemask_is_invalid +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: merge_with_undef_subreg_use_subrange_lanemask_is_invalid + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $sgpr8_sgpr9 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (s128), addrspace 4) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.3, implicit undef $vcc + ; CHECK-NEXT: S_BRANCH %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[S_LOAD_DWORDX4_IMM:%[0-9]+]].sub0:sgpr_128 = S_MOV_B32 0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]].sub1:sgpr_128 = COPY undef [[S_LOAD_DWORDX4_IMM]].sub0 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_LOAD_DWORDX4_IMM]] + bb.0: + liveins: $sgpr8_sgpr9 + + %0:sgpr_64 = COPY $sgpr8_sgpr9 + %1:sgpr_128 = S_LOAD_DWORDX4_IMM %0, 0, 0 :: (load (s128), addrspace 4) + + bb.1: + %2:sgpr_128 = COPY %1 + S_CBRANCH_VCCNZ %bb.3, implicit undef $vcc + S_BRANCH %bb.2 + + bb.2: + undef %3.sub0:sgpr_128 = S_MOV_B32 0 + %2:sgpr_128 = COPY killed %3 + + bb.3: + %4:sgpr_128 = COPY killed %2 + %4.sub1:sgpr_128 = COPY undef %1.sub0 + S_ENDPGM 0, implicit %4 + +...