Skip to content

Commit 774e6ea

Browse files
[AIEX] Add missing undef flags in copy expansion
This will handle properly use of non-dominating definitions. We also change the handling of the destination registers in two parts: *Copy expansion: we replace the ogininal index by the index of the first lane copy to avoid the creation LRs with just one instruction, in this way we keep que LI correct. *Rewrite: reset dead flags if necessary. Co-Authored-By: Krishnam Tibrewala <[email protected]>
1 parent 624d65f commit 774e6ea

File tree

2 files changed

+137
-11
lines changed

2 files changed

+137
-11
lines changed

llvm/lib/Target/AIE/AIESuperRegUtils.cpp

Lines changed: 53 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -103,8 +103,26 @@ void rewriteFullCopy(MachineInstr &MI, const std::set<int> &CopySubRegs,
103103
Register SrcReg = MI.getOperand(1).getReg();
104104
LaneBitmask LiveSrcLanes = getLiveLanesAt(CopyIndex, SrcReg, LIS);
105105

106-
LIS.removeVRegDefAt(LIS.getInterval(DstReg), CopyIndex.getRegSlot());
106+
if (!VRM.hasPhys(DstReg)) {
107+
// FIXME: This pass may cause verification failures. The fix should
108+
// be in the MachineVerifier. This is a very uncommon case where the
109+
// destination register was not allocated yet.
110+
// The machine verifier does not properly handle the semantics of:
111+
// 1. **Partial register definitions with `undefined`**: When the first
112+
// subregister is defined with `undefined`, it doesn't expect subsequent
113+
// definitions to implicitly read that lane.
114+
// 2. **Lane-based liveness for composite registers**: The verifier expects
115+
// a continuous live range for the entire register, but with subregister
116+
// definitions, different lanes have different live ranges that are being
117+
// built up incrementally.
118+
// 3. **Implicit reads in partial definitions**: The verifier doesn't
119+
// recognize that `%18.sub_dim_size:ed = COPY ...` implicitly reads the
120+
// previously defined `%18.sub_dim_count` lane.
121+
MI.getMF()->getProperties().set(
122+
MachineFunctionProperties::Property::FailsVerification);
123+
}
107124

125+
MachineInstr *FirstMI = nullptr;
108126
SmallSet<Register, 8> RegistersToRepair;
109127
for (int SubRegIdx : CopySubRegs) {
110128
if ((LiveSrcLanes & TRI.getSubRegIndexLaneMask(SubRegIdx)).none()) {
@@ -118,13 +136,31 @@ void rewriteFullCopy(MachineInstr &MI, const std::set<int> &CopySubRegs,
118136
.addReg(DstReg, RegState::Define, SubRegIdx)
119137
.addReg(SrcReg, 0, SubRegIdx)
120138
.getInstr();
139+
140+
// Only set undefined on the first partial copy. The first copy doesn't read
141+
// other lanes, but subsequent copies do read the previously written lanes.
142+
// Setting undefined on all copies breaks live interval tracking and causes
143+
// machine verifier errors.
144+
if (!FirstMI) {
145+
PartCopy->getOperand(0).setIsUndef();
146+
FirstMI = PartCopy;
147+
}
121148
LLVM_DEBUG(dbgs() << " to " << *PartCopy);
122149
LIS.InsertMachineInstrInMaps(*PartCopy);
123-
LIS.getInterval(PartCopy->getOperand(0).getReg());
150+
// We need to repair only the Src register. For the Dst register,
151+
// we don't need to do anything explicit, because we will replace the
152+
// original copy by the first lane copy in LIS. We avoid the explicit repair
153+
// of Dst reg because LIS will create a exclusive range for each copy,
154+
// because it considers that every sub-lane copy will make the preceding
155+
// one dead, what is not true for composite registers.
156+
// TODO: investigate why subregister liveness is being ignored by LIS
157+
// at this point.
124158
RegistersToRepair.insert(PartCopy->getOperand(1).getReg());
125159
}
126160

127-
LIS.RemoveMachineInstrFromMaps(MI);
161+
// Replace the original copy by the first one, so we automatically repair
162+
// DstReg's LI.
163+
LIS.ReplaceMachineInstrInMaps(MI, *FirstMI);
128164
MI.eraseFromParent();
129165
// As we don't handle all registers now (selective LI filter),
130166
// We should make sure that all LiveIntervals are correct.
@@ -184,8 +220,17 @@ void rewriteSuperReg(Register Reg, Register AssignedPhysReg,
184220

185221
// There might have been a write-undefined due to only writing one sub-lane.
186222
// Now that each sub-lane has its own VReg, the qualifier is invalid.
187-
if (RegOp.isDef())
223+
if (RegOp.isDef()) {
188224
RegOp.setIsUndef(false);
225+
// Also unset correctly the dead flag if the instruction
226+
// is not the dead slot in the live range (the def is still alive).
227+
LiveInterval &LI = LIS.getInterval(Reg);
228+
MachineInstr *DefMI = RegOp.getParent();
229+
SlotIndex Def = LIS.getInstructionIndex(*DefMI);
230+
LiveRange::iterator I = LI.FindSegmentContaining(Def);
231+
if (I->end != Def.getDeadSlot())
232+
RegOp.setIsDead(false);
233+
}
189234

190235
// Make sure the right reg class is applied, some MIs might use compound
191236
// classes with both 20 and 32 bits registers.
@@ -259,6 +304,10 @@ void repairLiveIntervals(SmallSet<Register, 8> &RegistersToRepair,
259304
LIS.removeInterval(R);
260305
LIS.createAndComputeVirtRegInterval(R);
261306
}
307+
308+
// After recomputing, shrink the interval to remove any invalid segments
309+
// This is important for registers with undefined definitions.
310+
LIS.shrinkToUses(&LIS.getInterval(R));
262311
}
263312
}
264313

llvm/test/CodeGen/AIE/aie2p/ra/staged-ra-check-undef.mir

Lines changed: 84 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
12

23
# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
34
# See https://llvm.org/LICENSE.txt for license information.
45
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
56
#
67
# (c) Copyright 2025 Advanced Micro Devices, Inc. or its affiliates
78
#
8-
# RUN: not --crash llc -O2 -mtriple=aie2p -start-before=greedy \
9-
# RUN: -stop-before=aie-unallocated-superreg-rewrite -o /dev/null %s 2>&1 | FileCheck %s
9+
# RUN: llc -O2 -mtriple=aie2p -start-before=greedy \
10+
# RUN: -stop-before=aie-unallocated-superreg-rewrite -verify-machineinstrs %s -o - | FileCheck %s
1011

1112
# The goal of this test is to check if we properly insert undef flag on the def side
1213
# of a expanded full copy. On a sub-register def operand, it refers to the part of the
@@ -15,15 +16,91 @@
1516
# force the related register to be inserted in liveout set of the predecessors block,
1617
# causing dominance problems.
1718

18-
# CHECK: LLVM ERROR: Found 1 machine code errors
19-
2019
---
2120
name: use_all_2d_regs
2221
tracksRegLiveness: true
2322
body: |
23+
; CHECK-LABEL: name: use_all_2d_regs
24+
; CHECK: bb.0:
25+
; CHECK-NEXT: successors: %bb.1(0x80000000)
26+
; CHECK-NEXT: {{ $}}
27+
; CHECK-NEXT: undef [[MOV_PD_imm11_pseudo:%[0-9]+]].sub_dim_stride:ed = MOV_PD_imm11_pseudo 1
28+
; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]].sub_mod:ed = MOV_PD_imm11_pseudo 0
29+
; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:edn = MOV_PD_imm11_pseudo -1
30+
; CHECK-NEXT: [[COPY:%[0-9]+]]:em = COPY [[MOV_PD_imm11_pseudo]].sub_dim_stride
31+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:edj = COPY [[MOV_PD_imm11_pseudo]].sub_mod
32+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:edn = COPY [[MOV_PD_imm11_pseudo]].sub_mod
33+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:edn = COPY [[MOV_PD_imm11_pseudo]].sub_mod
34+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:edn = COPY [[MOV_PD_imm11_pseudo]].sub_mod
35+
; CHECK-NEXT: [[COPY5:%[0-9]+]]:edn = COPY [[MOV_PD_imm11_pseudo]].sub_mod
36+
; CHECK-NEXT: [[COPY6:%[0-9]+]]:edn = COPY [[MOV_PD_imm11_pseudo]].sub_mod
37+
; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]].sub_dim_size:ed = COPY [[MOV_PD_imm11_pseudo]].sub_mod
38+
; CHECK-NEXT: [[COPY7:%[0-9]+]]:edc = COPY [[MOV_PD_imm11_pseudo]].sub_mod
39+
; CHECK-NEXT: [[COPY8:%[0-9]+]]:edc = COPY [[MOV_PD_imm11_pseudo]].sub_mod
40+
; CHECK-NEXT: [[COPY9:%[0-9]+]]:edc = COPY [[MOV_PD_imm11_pseudo]].sub_mod
41+
; CHECK-NEXT: [[COPY10:%[0-9]+]]:edc = COPY [[MOV_PD_imm11_pseudo]].sub_mod
42+
; CHECK-NEXT: [[COPY11:%[0-9]+]]:edc = COPY [[MOV_PD_imm11_pseudo]].sub_mod
43+
; CHECK-NEXT: [[COPY12:%[0-9]+]]:edc = COPY [[MOV_PD_imm11_pseudo]].sub_mod
44+
; CHECK-NEXT: [[COPY13:%[0-9]+]]:edc = COPY [[MOV_PD_imm11_pseudo]].sub_mod
45+
; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]].sub_dim_count:ed = COPY [[MOV_PD_imm11_pseudo]].sub_mod
46+
; CHECK-NEXT: [[COPY14:%[0-9]+]]:ed = COPY [[MOV_PD_imm11_pseudo]]
47+
; CHECK-NEXT: [[COPY15:%[0-9]+]]:edc = COPY [[MOV_PD_imm11_pseudo]].sub_mod
48+
; CHECK-NEXT: {{ $}}
49+
; CHECK-NEXT: bb.1:
50+
; CHECK-NEXT: successors: %bb.1(0x80000000)
51+
; CHECK-NEXT: {{ $}}
52+
; CHECK-NEXT: [[MOV_PD_imm11_pseudo2:%[0-9]+]]:ep = MOV_PD_imm11_pseudo 0
53+
; CHECK-NEXT: [[MOV_PD_imm11_pseudo3:%[0-9]+]]:ep = MOV_PD_imm11_pseudo 0
54+
; CHECK-NEXT: [[MOV_PD_imm11_pseudo4:%[0-9]+]]:ep = MOV_PD_imm11_pseudo 0
55+
; CHECK-NEXT: [[MOV_PD_imm11_pseudo5:%[0-9]+]]:ep = MOV_PD_imm11_pseudo 0
56+
; CHECK-NEXT: dead [[MOV_PD_imm11_pseudo2:%[0-9]+]]:ep, [[COPY7:%[0-9]+]]:edc = PADD_2D_pseudo_split [[MOV_PD_imm11_pseudo2]], [[COPY]], [[MOV_PD_imm11_pseudo1]], [[COPY1]], [[COPY7]]
57+
; CHECK-NEXT: [[MOV_PD_imm11_pseudo6:%[0-9]+]]:ep = MOV_PD_imm11_pseudo 0
58+
; CHECK-NEXT: [[COPY16:%[0-9]+]]:em = COPY [[COPY]]
59+
; CHECK-NEXT: [[COPY17:%[0-9]+]]:edj = COPY [[COPY1]]
60+
; CHECK-NEXT: dead [[MOV_PD_imm11_pseudo3:%[0-9]+]]:ep, [[COPY10:%[0-9]+]]:edc = PADD_2D_pseudo_split [[MOV_PD_imm11_pseudo3]], [[COPY16]], [[COPY2]], [[COPY17]], [[COPY10]]
61+
; CHECK-NEXT: [[MOV_PD_imm11_pseudo7:%[0-9]+]]:ep = MOV_PD_imm11_pseudo 0
62+
; CHECK-NEXT: [[COPY18:%[0-9]+]]:em = COPY [[COPY]]
63+
; CHECK-NEXT: [[COPY19:%[0-9]+]]:edj = COPY [[COPY1]]
64+
; CHECK-NEXT: dead [[MOV_PD_imm11_pseudo4:%[0-9]+]]:ep, [[COPY8:%[0-9]+]]:edc = PADD_2D_pseudo_split [[MOV_PD_imm11_pseudo4]], [[COPY18]], [[COPY3]], [[COPY19]], [[COPY8]]
65+
; CHECK-NEXT: [[COPY20:%[0-9]+]]:em = COPY [[COPY]]
66+
; CHECK-NEXT: [[COPY21:%[0-9]+]]:edn = COPY [[MOV_PD_imm11_pseudo1]]
67+
; CHECK-NEXT: [[MOV_PD_imm11_pseudo8:%[0-9]+]]:ep = MOV_PD_imm11_pseudo 0
68+
; CHECK-NEXT: [[COPY22:%[0-9]+]]:edj = COPY [[COPY1]]
69+
; CHECK-NEXT: dead [[MOV_PD_imm11_pseudo5:%[0-9]+]]:ep, [[COPY11:%[0-9]+]]:edc = PADD_2D_pseudo_split [[MOV_PD_imm11_pseudo5]], [[COPY20]], [[COPY21]], [[COPY22]], [[COPY11]]
70+
; CHECK-NEXT: [[COPY23:%[0-9]+]]:em = COPY [[COPY]]
71+
; CHECK-NEXT: [[COPY24:%[0-9]+]]:edj = COPY [[COPY1]]
72+
; CHECK-NEXT: dead [[MOV_PD_imm11_pseudo6:%[0-9]+]]:ep, [[COPY9:%[0-9]+]]:edc = PADD_2D_pseudo_split [[MOV_PD_imm11_pseudo6]], [[COPY23]], [[COPY4]], [[COPY24]], [[COPY9]]
73+
; CHECK-NEXT: [[MOV_PD_imm11_pseudo9:%[0-9]+]]:ep = MOV_PD_imm11_pseudo 0
74+
; CHECK-NEXT: [[COPY25:%[0-9]+]]:em = COPY [[COPY]]
75+
; CHECK-NEXT: [[COPY26:%[0-9]+]]:edj = COPY [[COPY1]]
76+
; CHECK-NEXT: dead [[MOV_PD_imm11_pseudo7:%[0-9]+]]:ep, [[COPY12:%[0-9]+]]:edc = PADD_2D_pseudo_split [[MOV_PD_imm11_pseudo7]], [[COPY25]], [[COPY5]], [[COPY26]], [[COPY12]]
77+
; CHECK-NEXT: undef [[COPY27:%[0-9]+]].sub_dim_count:ed = COPY [[COPY10]] {
78+
; CHECK-NEXT: internal [[COPY27]].sub_dim_size:ed = COPY [[COPY2]]
79+
; CHECK-NEXT: }
80+
; CHECK-NEXT: [[COPY28:%[0-9]+]]:edc = COPY [[COPY14]].sub_dim_count
81+
; CHECK-NEXT: [[COPY29:%[0-9]+]]:edn = COPY [[COPY14]].sub_dim_size
82+
; CHECK-NEXT: [[COPY30:%[0-9]+]]:edj = COPY [[COPY14]].sub_dim_stride
83+
; CHECK-NEXT: [[COPY31:%[0-9]+]]:em = COPY [[COPY14]].sub_mod
84+
; CHECK-NEXT: dead [[MOV_PD_imm11_pseudo9:%[0-9]+]]:ep, [[COPY28:%[0-9]+]]:edc = PADD_2D_pseudo_split [[MOV_PD_imm11_pseudo9]], [[COPY31]], [[COPY29]], [[COPY30]], [[COPY28]]
85+
; CHECK-NEXT: [[COPY32:%[0-9]+]]:em = COPY [[COPY]]
86+
; CHECK-NEXT: [[MOV_PD_imm11_pseudo10:%[0-9]+]]:ep = MOV_PD_imm11_pseudo 0
87+
; CHECK-NEXT: [[COPY33:%[0-9]+]]:edj = COPY [[COPY1]]
88+
; CHECK-NEXT: dead [[MOV_PD_imm11_pseudo8:%[0-9]+]]:ep, [[COPY13:%[0-9]+]]:edc = PADD_2D_pseudo_split [[MOV_PD_imm11_pseudo8]], [[COPY32]], [[COPY6]], [[COPY33]], [[COPY13]]
89+
; CHECK-NEXT: [[COPY34:%[0-9]+]]:edn = COPY [[COPY31]]
90+
; CHECK-NEXT: undef [[COPY14:%[0-9]+]].sub_dim_count:ed = COPY [[COPY28]]
91+
; CHECK-NEXT: [[COPY14:%[0-9]+]].sub_dim_size:ed = COPY [[COPY29]]
92+
; CHECK-NEXT: [[COPY14:%[0-9]+]].sub_dim_stride:ed = COPY [[COPY30]]
93+
; CHECK-NEXT: [[COPY14:%[0-9]+]].sub_mod:ed = COPY [[COPY31]]
94+
; CHECK-NEXT: [[COPY35:%[0-9]+]]:em = COPY [[COPY31]]
95+
; CHECK-NEXT: [[COPY10:%[0-9]+]]:edc = COPY [[COPY27]].sub_dim_count {
96+
; CHECK-NEXT: internal [[COPY2]]:edn = COPY [[COPY27]].sub_dim_size
97+
; CHECK-NEXT: }
98+
; CHECK-NEXT: [[COPY36:%[0-9]+]]:edj = COPY [[COPY1]]
99+
; CHECK-NEXT: dead [[MOV_PD_imm11_pseudo10:%[0-9]+]]:ep, [[COPY15:%[0-9]+]]:edc = PADD_2D_pseudo_split [[MOV_PD_imm11_pseudo10]], [[COPY35]], [[COPY34]], [[COPY36]], [[COPY15]]
100+
; CHECK-NEXT: PseudoJ_jump_imm %bb.1
24101
bb.0:
25102
successors: %bb.1(0x80000000)
26-
103+
27104
undef %80.sub_dim_stride:ed = MOV_PD_imm11_pseudo 1
28105
%80.sub_mod:ed = MOV_PD_imm11_pseudo 0
29106
undef %105.sub_dim_size:ed = MOV_PD_imm11_pseudo -1
@@ -44,10 +121,10 @@ body: |
44121
%82.sub_dim_count:ed = COPY %80.sub_mod
45122
%80.sub_dim_count:ed = COPY %80.sub_mod
46123
undef %77.sub_dim_count:ed = COPY %80.sub_mod
47-
124+
48125
bb.1:
49126
successors: %bb.1(0x80000000)
50-
127+
51128
%10:ep = MOV_PD_imm11_pseudo 0
52129
%18:ep = MOV_PD_imm11_pseudo 0
53130
%22:ep = MOV_PD_imm11_pseudo 0

0 commit comments

Comments
 (0)