diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp index 20ad6445344d8..7c108ce2c8323 100644 --- a/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -1325,11 +1325,6 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, const MCInstrDesc &MCID = DefMI->getDesc(); if (MCID.getNumDefs() != 1) return false; - // Only support subregister destinations when the def is read-undef. - MachineOperand &DstOperand = CopyMI->getOperand(0); - Register CopyDstReg = DstOperand.getReg(); - if (DstOperand.getSubReg() && !DstOperand.isUndef()) - return false; // If both SrcIdx and DstIdx are set, correct rematerialization would widen // the register substantially (beyond both source and dest size). This is bad @@ -1339,6 +1334,32 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, if (SrcIdx && DstIdx) return false; + // Only support subregister destinations when the def is read-undef. + MachineOperand &DstOperand = CopyMI->getOperand(0); + Register CopyDstReg = DstOperand.getReg(); + if (DstOperand.getSubReg() && !DstOperand.isUndef()) + return false; + + // In the physical register case, checking that the def is read-undef is not + // enough. We're widening the def and need to avoid clobbering other live + // values in the unused register pieces. + // + // TODO: Targets may support rewriting the rematerialized instruction to only + // touch relevant lanes, in which case we don't need any liveness check. + if (CopyDstReg.isPhysical() && CP.isPartial()) { + for (MCRegUnit Unit : TRI->regunits(DstReg)) { + // Ignore the register units we are writing anyway. + if (is_contained(TRI->regunits(CopyDstReg), Unit)) + continue; + + // Check if the other lanes we are defining are live at the + // rematerialization point. + LiveRange &LR = LIS->getRegUnit(Unit); + if (LR.liveAt(CopyIdx)) + return false; + } + } + const unsigned DefSubIdx = DefMI->getOperand(0).getSubReg(); const TargetRegisterClass *DefRC = TII->getRegClass(MCID, 0, TRI, *MF); if (!DefMI->isImplicitDef()) { diff --git a/llvm/test/CodeGen/AMDGPU/remat-physreg-copy-subreg-extract-already-live-at-def-issue120970.mir b/llvm/test/CodeGen/AMDGPU/remat-physreg-copy-subreg-extract-already-live-at-def-issue120970.mir new file mode 100644 index 0000000000000..3879f6dccf9d6 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/remat-physreg-copy-subreg-extract-already-live-at-def-issue120970.mir @@ -0,0 +1,85 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=register-coalescer -o - %s | FileCheck %s + +# This used to assert due to trying to rematerialize V_MOV_B64_PSEUDO +# at copy to $vgpr1. This would assert since this would clobber the +# live value in $vgpr0. + +--- +name: rematerialize_physreg_sub_def_already_live_at_def_assert +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: rematerialize_physreg_sub_def_already_live_at_def_assert + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 1, implicit $exec + ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec + ; CHECK-NEXT: $vgpr1 = COPY [[V_MOV_B]].sub1 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit killed $vgpr1 + %0:vreg_64 = V_MOV_B64_PSEUDO 1, implicit $exec + %1:vgpr_32 = COPY %0.sub1 + $vgpr0 = V_MOV_B32_e32 0, implicit $exec + $vgpr1 = COPY %1 + SI_RETURN implicit $vgpr0, implicit killed $vgpr1 +... + +# Same as previous, except with an IMPLICIT_DEF +--- +name: rematerialize_physreg_sub_def_already_live_at_def_assert_implicit_def +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: rematerialize_physreg_sub_def_already_live_at_def_assert_implicit_def + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec + ; CHECK-NEXT: $vgpr1 = COPY [[DEF]].sub1 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit killed $vgpr1 + %0:vreg_64 = IMPLICIT_DEF + %1:vgpr_32 = COPY %0.sub1 + $vgpr0 = V_MOV_B32_e32 0, implicit $exec + $vgpr1 = COPY %1 + SI_RETURN implicit $vgpr0, implicit killed $vgpr1 +... + +--- +name: rematerialize_physreg_sub_def_no_live_sub_def_0 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: rematerialize_physreg_sub_def_no_live_sub_def_0 + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead $vgpr0_vgpr1 = V_MOV_B64_PSEUDO 1, implicit $exec, implicit-def $vgpr1 + ; CHECK-NEXT: SI_RETURN implicit killed $vgpr1 + %0:vreg_64 = V_MOV_B64_PSEUDO 1, implicit $exec + %1:vgpr_32 = COPY %0.sub1 + $vgpr1 = COPY %1 + SI_RETURN implicit killed $vgpr1 +... + +--- +name: rematerialize_physreg_sub_def_no_live_sub_def_1 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: rematerialize_physreg_sub_def_no_live_sub_def_1 + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead $vgpr1_vgpr2 = V_MOV_B64_PSEUDO 1, implicit $exec, implicit-def $vgpr1 + ; CHECK-NEXT: SI_RETURN implicit killed $vgpr1 + %0:vreg_64 = V_MOV_B64_PSEUDO 1, implicit $exec + %1:vgpr_32 = COPY %0.sub0 + $vgpr1 = COPY %1 + SI_RETURN implicit killed $vgpr1 +...