From a8e7be5726bee76d95dae9e550c1ae4bd7f9da6d Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 3 Jan 2025 12:34:41 +0700 Subject: [PATCH 1/2] RegisterCoalescer: Fix assert on remat to copy-to-physreg with subregs Do not try to rematerialize a super-register def used by a subregister extract copy into a copy to a physical register if the other pieces of the full physreg are live at the rematerialization point. It would insert the super-register def at the rematerialization point, and assert since the other half of the register was already live. This is analagous to the undef subregister def handling above, which handled the virtual register case. Fixes #120970 --- llvm/lib/CodeGen/RegisterCoalescer.cpp | 31 +++++-- ...xtract-already-live-at-def-issue120970.mir | 85 +++++++++++++++++++ 2 files changed, 111 insertions(+), 5 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/remat-physreg-copy-subreg-extract-already-live-at-def-issue120970.mir diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp index 20ad6445344d8..c9e6ed03ed42f 100644 --- a/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -1325,11 +1325,6 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, const MCInstrDesc &MCID = DefMI->getDesc(); if (MCID.getNumDefs() != 1) return false; - // Only support subregister destinations when the def is read-undef. - MachineOperand &DstOperand = CopyMI->getOperand(0); - Register CopyDstReg = DstOperand.getReg(); - if (DstOperand.getSubReg() && !DstOperand.isUndef()) - return false; // If both SrcIdx and DstIdx are set, correct rematerialization would widen // the register substantially (beyond both source and dest size). This is bad @@ -1339,6 +1334,32 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, if (SrcIdx && DstIdx) return false; + // Only support subregister destinations when the def is read-undef. + MachineOperand &DstOperand = CopyMI->getOperand(0); + Register CopyDstReg = DstOperand.getReg(); + if (DstOperand.getSubReg() && !DstOperand.isUndef()) + return false; + + // Only support subregister destinations when the def is read-undef, in the + // physical register case. We're widening the def and need to avoid clobbering + // other live values in the unused register pieces. + // + // TODO: Targets may support rewriting the rematerialized instruction to only + // touch relevant lanes, in which case we don't need any liveness check. + if (CopyDstReg.isPhysical() && CP.isPartial()) { + for (MCRegUnit Unit : TRI->regunits(DstReg)) { + // Ignore the register units we are writing anyway. + if (is_contained(TRI->regunits(CopyDstReg), Unit)) + continue; + + // Check if the other lanes we are defining are live at the + // rematerialization point. + LiveRange &LR = LIS->getRegUnit(Unit); + if (LR.liveAt(CopyIdx)) + return false; + } + } + const unsigned DefSubIdx = DefMI->getOperand(0).getSubReg(); const TargetRegisterClass *DefRC = TII->getRegClass(MCID, 0, TRI, *MF); if (!DefMI->isImplicitDef()) { diff --git a/llvm/test/CodeGen/AMDGPU/remat-physreg-copy-subreg-extract-already-live-at-def-issue120970.mir b/llvm/test/CodeGen/AMDGPU/remat-physreg-copy-subreg-extract-already-live-at-def-issue120970.mir new file mode 100644 index 0000000000000..3879f6dccf9d6 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/remat-physreg-copy-subreg-extract-already-live-at-def-issue120970.mir @@ -0,0 +1,85 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=register-coalescer -o - %s | FileCheck %s + +# This used to assert due to trying to rematerialize V_MOV_B64_PSEUDO +# at copy to $vgpr1. This would assert since this would clobber the +# live value in $vgpr0. + +--- +name: rematerialize_physreg_sub_def_already_live_at_def_assert +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: rematerialize_physreg_sub_def_already_live_at_def_assert + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 1, implicit $exec + ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec + ; CHECK-NEXT: $vgpr1 = COPY [[V_MOV_B]].sub1 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit killed $vgpr1 + %0:vreg_64 = V_MOV_B64_PSEUDO 1, implicit $exec + %1:vgpr_32 = COPY %0.sub1 + $vgpr0 = V_MOV_B32_e32 0, implicit $exec + $vgpr1 = COPY %1 + SI_RETURN implicit $vgpr0, implicit killed $vgpr1 +... + +# Same as previous, except with an IMPLICIT_DEF +--- +name: rematerialize_physreg_sub_def_already_live_at_def_assert_implicit_def +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: rematerialize_physreg_sub_def_already_live_at_def_assert_implicit_def + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec + ; CHECK-NEXT: $vgpr1 = COPY [[DEF]].sub1 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit killed $vgpr1 + %0:vreg_64 = IMPLICIT_DEF + %1:vgpr_32 = COPY %0.sub1 + $vgpr0 = V_MOV_B32_e32 0, implicit $exec + $vgpr1 = COPY %1 + SI_RETURN implicit $vgpr0, implicit killed $vgpr1 +... + +--- +name: rematerialize_physreg_sub_def_no_live_sub_def_0 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: rematerialize_physreg_sub_def_no_live_sub_def_0 + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead $vgpr0_vgpr1 = V_MOV_B64_PSEUDO 1, implicit $exec, implicit-def $vgpr1 + ; CHECK-NEXT: SI_RETURN implicit killed $vgpr1 + %0:vreg_64 = V_MOV_B64_PSEUDO 1, implicit $exec + %1:vgpr_32 = COPY %0.sub1 + $vgpr1 = COPY %1 + SI_RETURN implicit killed $vgpr1 +... + +--- +name: rematerialize_physreg_sub_def_no_live_sub_def_1 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: rematerialize_physreg_sub_def_no_live_sub_def_1 + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead $vgpr1_vgpr2 = V_MOV_B64_PSEUDO 1, implicit $exec, implicit-def $vgpr1 + ; CHECK-NEXT: SI_RETURN implicit killed $vgpr1 + %0:vreg_64 = V_MOV_B64_PSEUDO 1, implicit $exec + %1:vgpr_32 = COPY %0.sub0 + $vgpr1 = COPY %1 + SI_RETURN implicit killed $vgpr1 +... From 9cef81d6c8a8f946d77b089ce0def7f913725bf1 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 7 Jan 2025 08:42:05 +0700 Subject: [PATCH 2/2] Adjust comment --- llvm/lib/CodeGen/RegisterCoalescer.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp index c9e6ed03ed42f..7c108ce2c8323 100644 --- a/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -1340,9 +1340,9 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, if (DstOperand.getSubReg() && !DstOperand.isUndef()) return false; - // Only support subregister destinations when the def is read-undef, in the - // physical register case. We're widening the def and need to avoid clobbering - // other live values in the unused register pieces. + // In the physical register case, checking that the def is read-undef is not + // enough. We're widening the def and need to avoid clobbering other live + // values in the unused register pieces. // // TODO: Targets may support rewriting the rematerialized instruction to only // touch relevant lanes, in which case we don't need any liveness check.