Skip to content
Open
57 changes: 53 additions & 4 deletions llvm/lib/Target/AIE/AIESuperRegUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,26 @@ void rewriteFullCopy(MachineInstr &MI, const std::set<int> &CopySubRegs,
Register SrcReg = MI.getOperand(1).getReg();
LaneBitmask LiveSrcLanes = getLiveLanesAt(CopyIndex, SrcReg, LIS);

LIS.removeVRegDefAt(LIS.getInterval(DstReg), CopyIndex.getRegSlot());
if (!VRM.hasPhys(DstReg)) {
// FIXME: This pass may cause verification failures. The fix should
// be in the MachineVerifier. This is a very uncommon case where the
// destination register was not allocated yet.
// The machine verifier does not properly handle the semantics of:
// 1. **Partial register definitions with `undefined`**: When the first
// subregister is defined with `undefined`, it doesn't expect subsequent
// definitions to implicitly read that lane.
// 2. **Lane-based liveness for composite registers**: The verifier expects
// a continuous live range for the entire register, but with subregister
// definitions, different lanes have different live ranges that are being
// built up incrementally.
// 3. **Implicit reads in partial definitions**: The verifier doesn't
// recognize that `%18.sub_dim_size:ed = COPY ...` implicitly reads the
// previously defined `%18.sub_dim_count` lane.
MI.getMF()->getProperties().set(
MachineFunctionProperties::Property::FailsVerification);
}

MachineInstr *FirstMI = nullptr;
SmallSet<Register, 8> RegistersToRepair;
for (int SubRegIdx : CopySubRegs) {
if ((LiveSrcLanes & TRI.getSubRegIndexLaneMask(SubRegIdx)).none()) {
Expand All @@ -118,13 +136,31 @@ void rewriteFullCopy(MachineInstr &MI, const std::set<int> &CopySubRegs,
.addReg(DstReg, RegState::Define, SubRegIdx)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: we do not need undefs here because we fully define the subregisters that we copy to?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, we start the definition on the first copy, and we consider that next copies will read the previous one. In practice, inserting undefs in all copies will not hurt in general, but as is, we have a more accurate model for incremental definitions.

.addReg(SrcReg, 0, SubRegIdx)
.getInstr();

// Only set undefined on the first partial copy. The first copy doesn't read
// other lanes, but subsequent copies do read the previously written lanes.
// Setting undefined on all copies breaks live interval tracking and causes
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh yeah i also found that out with subreg spilling XD

// machine verifier errors.
if (!FirstMI) {
PartCopy->getOperand(0).setIsUndef();
FirstMI = PartCopy;
}
LLVM_DEBUG(dbgs() << " to " << *PartCopy);
LIS.InsertMachineInstrInMaps(*PartCopy);
LIS.getInterval(PartCopy->getOperand(0).getReg());
// We need to repair only the Src register. For the Dst register,
// we don't need to do anything explicit, because we will replace the
// original copy by the first lane copy in LIS. We avoid the explicit repair
// of Dst reg because LIS will create a exclusive range for each copy,
// because it considers that every sub-lane copy will make the preceding
// one dead, what is not true for composite registers.
// TODO: investigate why subregister liveness is being ignored by LIS
// at this point.
RegistersToRepair.insert(PartCopy->getOperand(1).getReg());
}

LIS.RemoveMachineInstrFromMaps(MI);
// Replace the original copy by the first one, so we automatically repair
// DstReg's LI.
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why?

LIS.ReplaceMachineInstrInMaps(MI, *FirstMI);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why aren't we inserting all the newly created machineinstructions into LIS?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because, later on, each new lane copy will make dead the previous one if we try to fix the LI. In this case, we say that the LR will start on the first copy and then it will simply continue though the next ones, until the next last use. I this way we prevent false dead flags everywhere.

MI.eraseFromParent();
// As we don't handle all registers now (selective LI filter),
// We should make sure that all LiveIntervals are correct.
Expand Down Expand Up @@ -184,8 +220,17 @@ void rewriteSuperReg(Register Reg, Register AssignedPhysReg,

// There might have been a write-undefined due to only writing one sub-lane.
// Now that each sub-lane has its own VReg, the qualifier is invalid.
if (RegOp.isDef())
if (RegOp.isDef()) {
RegOp.setIsUndef(false);
// Also unset correctly the dead flag if the instruction
// is not the dead slot in the live range (the def is still alive).
LiveInterval &LI = LIS.getInterval(Reg);
MachineInstr *DefMI = RegOp.getParent();
SlotIndex Def = LIS.getInstructionIndex(*DefMI);
LiveRange::iterator I = LI.FindSegmentContaining(Def);
if (I->end != Def.getDeadSlot())
RegOp.setIsDead(false);
}

// Make sure the right reg class is applied, some MIs might use compound
// classes with both 20 and 32 bits registers.
Expand Down Expand Up @@ -259,6 +304,10 @@ void repairLiveIntervals(SmallSet<Register, 8> &RegistersToRepair,
LIS.removeInterval(R);
LIS.createAndComputeVirtRegInterval(R);
}

// After recomputing, shrink the interval to remove any invalid segments
// This is important for registers with undefined definitions.
LIS.shrinkToUses(&LIS.getInterval(R));
}
}

Expand Down
91 changes: 84 additions & 7 deletions llvm/test/CodeGen/AIE/aie2p/ra/staged-ra-check-undef.mir
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5

# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# (c) Copyright 2025 Advanced Micro Devices, Inc. or its affiliates
#
# RUN: not --crash llc -O2 -mtriple=aie2p -start-before=greedy \
# RUN: -stop-before=aie-unallocated-superreg-rewrite -o /dev/null %s 2>&1 | FileCheck %s
# RUN: llc -O2 -mtriple=aie2p -start-before=greedy \
# RUN: -stop-before=aie-unallocated-superreg-rewrite -verify-machineinstrs %s -o - | FileCheck %s

# The goal of this test is to check if we properly insert undef flag on the def side
Copy link
Collaborator Author

@andcarminati andcarminati Nov 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I could extend the verifier to cover partial definitions. It is far away from be trivial, but it could be done.

# of a expanded full copy. On a sub-register def operand, it refers to the part of the
Expand All @@ -15,15 +16,91 @@
# force the related register to be inserted in liveout set of the predecessors block,
# causing dominance problems.

# CHECK: LLVM ERROR: Found 1 machine code errors

---
name: use_all_2d_regs
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: use_all_2d_regs
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: undef [[MOV_PD_imm11_pseudo:%[0-9]+]].sub_dim_stride:ed = MOV_PD_imm11_pseudo 1
; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]].sub_mod:ed = MOV_PD_imm11_pseudo 0
; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:edn = MOV_PD_imm11_pseudo -1
; CHECK-NEXT: [[COPY:%[0-9]+]]:em = COPY [[MOV_PD_imm11_pseudo]].sub_dim_stride
; CHECK-NEXT: [[COPY1:%[0-9]+]]:edj = COPY [[MOV_PD_imm11_pseudo]].sub_mod
; CHECK-NEXT: [[COPY2:%[0-9]+]]:edn = COPY [[MOV_PD_imm11_pseudo]].sub_mod
; CHECK-NEXT: [[COPY3:%[0-9]+]]:edn = COPY [[MOV_PD_imm11_pseudo]].sub_mod
; CHECK-NEXT: [[COPY4:%[0-9]+]]:edn = COPY [[MOV_PD_imm11_pseudo]].sub_mod
; CHECK-NEXT: [[COPY5:%[0-9]+]]:edn = COPY [[MOV_PD_imm11_pseudo]].sub_mod
; CHECK-NEXT: [[COPY6:%[0-9]+]]:edn = COPY [[MOV_PD_imm11_pseudo]].sub_mod
; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]].sub_dim_size:ed = COPY [[MOV_PD_imm11_pseudo]].sub_mod
; CHECK-NEXT: [[COPY7:%[0-9]+]]:edc = COPY [[MOV_PD_imm11_pseudo]].sub_mod
; CHECK-NEXT: [[COPY8:%[0-9]+]]:edc = COPY [[MOV_PD_imm11_pseudo]].sub_mod
; CHECK-NEXT: [[COPY9:%[0-9]+]]:edc = COPY [[MOV_PD_imm11_pseudo]].sub_mod
; CHECK-NEXT: [[COPY10:%[0-9]+]]:edc = COPY [[MOV_PD_imm11_pseudo]].sub_mod
; CHECK-NEXT: [[COPY11:%[0-9]+]]:edc = COPY [[MOV_PD_imm11_pseudo]].sub_mod
; CHECK-NEXT: [[COPY12:%[0-9]+]]:edc = COPY [[MOV_PD_imm11_pseudo]].sub_mod
; CHECK-NEXT: [[COPY13:%[0-9]+]]:edc = COPY [[MOV_PD_imm11_pseudo]].sub_mod
; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]].sub_dim_count:ed = COPY [[MOV_PD_imm11_pseudo]].sub_mod
; CHECK-NEXT: [[COPY14:%[0-9]+]]:ed = COPY [[MOV_PD_imm11_pseudo]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:edc = COPY [[MOV_PD_imm11_pseudo]].sub_mod
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[MOV_PD_imm11_pseudo2:%[0-9]+]]:ep = MOV_PD_imm11_pseudo 0
; CHECK-NEXT: [[MOV_PD_imm11_pseudo3:%[0-9]+]]:ep = MOV_PD_imm11_pseudo 0
; CHECK-NEXT: [[MOV_PD_imm11_pseudo4:%[0-9]+]]:ep = MOV_PD_imm11_pseudo 0
; CHECK-NEXT: [[MOV_PD_imm11_pseudo5:%[0-9]+]]:ep = MOV_PD_imm11_pseudo 0
; CHECK-NEXT: dead [[MOV_PD_imm11_pseudo2:%[0-9]+]]:ep, [[COPY7:%[0-9]+]]:edc = PADD_2D_pseudo_split [[MOV_PD_imm11_pseudo2]], [[COPY]], [[MOV_PD_imm11_pseudo1]], [[COPY1]], [[COPY7]]
; CHECK-NEXT: [[MOV_PD_imm11_pseudo6:%[0-9]+]]:ep = MOV_PD_imm11_pseudo 0
; CHECK-NEXT: [[COPY16:%[0-9]+]]:em = COPY [[COPY]]
; CHECK-NEXT: [[COPY17:%[0-9]+]]:edj = COPY [[COPY1]]
; CHECK-NEXT: dead [[MOV_PD_imm11_pseudo3:%[0-9]+]]:ep, [[COPY10:%[0-9]+]]:edc = PADD_2D_pseudo_split [[MOV_PD_imm11_pseudo3]], [[COPY16]], [[COPY2]], [[COPY17]], [[COPY10]]
; CHECK-NEXT: [[MOV_PD_imm11_pseudo7:%[0-9]+]]:ep = MOV_PD_imm11_pseudo 0
; CHECK-NEXT: [[COPY18:%[0-9]+]]:em = COPY [[COPY]]
; CHECK-NEXT: [[COPY19:%[0-9]+]]:edj = COPY [[COPY1]]
; CHECK-NEXT: dead [[MOV_PD_imm11_pseudo4:%[0-9]+]]:ep, [[COPY8:%[0-9]+]]:edc = PADD_2D_pseudo_split [[MOV_PD_imm11_pseudo4]], [[COPY18]], [[COPY3]], [[COPY19]], [[COPY8]]
; CHECK-NEXT: [[COPY20:%[0-9]+]]:em = COPY [[COPY]]
; CHECK-NEXT: [[COPY21:%[0-9]+]]:edn = COPY [[MOV_PD_imm11_pseudo1]]
; CHECK-NEXT: [[MOV_PD_imm11_pseudo8:%[0-9]+]]:ep = MOV_PD_imm11_pseudo 0
; CHECK-NEXT: [[COPY22:%[0-9]+]]:edj = COPY [[COPY1]]
; CHECK-NEXT: dead [[MOV_PD_imm11_pseudo5:%[0-9]+]]:ep, [[COPY11:%[0-9]+]]:edc = PADD_2D_pseudo_split [[MOV_PD_imm11_pseudo5]], [[COPY20]], [[COPY21]], [[COPY22]], [[COPY11]]
; CHECK-NEXT: [[COPY23:%[0-9]+]]:em = COPY [[COPY]]
; CHECK-NEXT: [[COPY24:%[0-9]+]]:edj = COPY [[COPY1]]
; CHECK-NEXT: dead [[MOV_PD_imm11_pseudo6:%[0-9]+]]:ep, [[COPY9:%[0-9]+]]:edc = PADD_2D_pseudo_split [[MOV_PD_imm11_pseudo6]], [[COPY23]], [[COPY4]], [[COPY24]], [[COPY9]]
; CHECK-NEXT: [[MOV_PD_imm11_pseudo9:%[0-9]+]]:ep = MOV_PD_imm11_pseudo 0
; CHECK-NEXT: [[COPY25:%[0-9]+]]:em = COPY [[COPY]]
; CHECK-NEXT: [[COPY26:%[0-9]+]]:edj = COPY [[COPY1]]
; CHECK-NEXT: dead [[MOV_PD_imm11_pseudo7:%[0-9]+]]:ep, [[COPY12:%[0-9]+]]:edc = PADD_2D_pseudo_split [[MOV_PD_imm11_pseudo7]], [[COPY25]], [[COPY5]], [[COPY26]], [[COPY12]]
; CHECK-NEXT: undef [[COPY27:%[0-9]+]].sub_dim_count:ed = COPY [[COPY10]] {
; CHECK-NEXT: internal [[COPY27]].sub_dim_size:ed = COPY [[COPY2]]
; CHECK-NEXT: }
; CHECK-NEXT: [[COPY28:%[0-9]+]]:edc = COPY [[COPY14]].sub_dim_count
; CHECK-NEXT: [[COPY29:%[0-9]+]]:edn = COPY [[COPY14]].sub_dim_size
; CHECK-NEXT: [[COPY30:%[0-9]+]]:edj = COPY [[COPY14]].sub_dim_stride
; CHECK-NEXT: [[COPY31:%[0-9]+]]:em = COPY [[COPY14]].sub_mod
; CHECK-NEXT: dead [[MOV_PD_imm11_pseudo9:%[0-9]+]]:ep, [[COPY28:%[0-9]+]]:edc = PADD_2D_pseudo_split [[MOV_PD_imm11_pseudo9]], [[COPY31]], [[COPY29]], [[COPY30]], [[COPY28]]
; CHECK-NEXT: [[COPY32:%[0-9]+]]:em = COPY [[COPY]]
; CHECK-NEXT: [[MOV_PD_imm11_pseudo10:%[0-9]+]]:ep = MOV_PD_imm11_pseudo 0
; CHECK-NEXT: [[COPY33:%[0-9]+]]:edj = COPY [[COPY1]]
; CHECK-NEXT: dead [[MOV_PD_imm11_pseudo8:%[0-9]+]]:ep, [[COPY13:%[0-9]+]]:edc = PADD_2D_pseudo_split [[MOV_PD_imm11_pseudo8]], [[COPY32]], [[COPY6]], [[COPY33]], [[COPY13]]
; CHECK-NEXT: [[COPY34:%[0-9]+]]:edn = COPY [[COPY31]]
; CHECK-NEXT: undef [[COPY14:%[0-9]+]].sub_dim_count:ed = COPY [[COPY28]]
; CHECK-NEXT: [[COPY14:%[0-9]+]].sub_dim_size:ed = COPY [[COPY29]]
; CHECK-NEXT: [[COPY14:%[0-9]+]].sub_dim_stride:ed = COPY [[COPY30]]
; CHECK-NEXT: [[COPY14:%[0-9]+]].sub_mod:ed = COPY [[COPY31]]
; CHECK-NEXT: [[COPY35:%[0-9]+]]:em = COPY [[COPY31]]
; CHECK-NEXT: [[COPY10:%[0-9]+]]:edc = COPY [[COPY27]].sub_dim_count {
; CHECK-NEXT: internal [[COPY2]]:edn = COPY [[COPY27]].sub_dim_size
; CHECK-NEXT: }
; CHECK-NEXT: [[COPY36:%[0-9]+]]:edj = COPY [[COPY1]]
; CHECK-NEXT: dead [[MOV_PD_imm11_pseudo10:%[0-9]+]]:ep, [[COPY15:%[0-9]+]]:edc = PADD_2D_pseudo_split [[MOV_PD_imm11_pseudo10]], [[COPY35]], [[COPY34]], [[COPY36]], [[COPY15]]
; CHECK-NEXT: PseudoJ_jump_imm %bb.1
bb.0:
successors: %bb.1(0x80000000)

undef %80.sub_dim_stride:ed = MOV_PD_imm11_pseudo 1
%80.sub_mod:ed = MOV_PD_imm11_pseudo 0
undef %105.sub_dim_size:ed = MOV_PD_imm11_pseudo -1
Expand All @@ -44,10 +121,10 @@ body: |
%82.sub_dim_count:ed = COPY %80.sub_mod
%80.sub_dim_count:ed = COPY %80.sub_mod
undef %77.sub_dim_count:ed = COPY %80.sub_mod

bb.1:
successors: %bb.1(0x80000000)

%10:ep = MOV_PD_imm11_pseudo 0
%18:ep = MOV_PD_imm11_pseudo 0
%22:ep = MOV_PD_imm11_pseudo 0
Expand Down