Skip to content

Commit 64e5db9

Browse files
committed
[AIEX] added full search for global combiners
1 parent 2b363fa commit 64e5db9

26 files changed

+835
-789
lines changed

llvm/lib/Target/AIE/AIEGlobalCombiner.cpp

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,94 @@ std::vector<const GenericCombiner *> CombineCandidates::searchCombinerSet(
211211
// seed greedy solution
212212
auto BestSolution = getGreedySolution();
213213

214+
std::priority_queue<CombinerSolution> PQ;
215+
// Add Start Position
216+
PQ.emplace(NumCombiner);
217+
LLVM_DEBUG(dbgs() << MBB->getName() << " - Initial Search Start "
218+
<< BestSolution << "\n");
219+
220+
// The search algorithm finds the optimal combination of combiners to
221+
// maximize gain. It starts with a greedy solution and iteratively
222+
// explores the solution space using a priority queue.
223+
//
224+
// The Index within the CombinerSolution represents the index of the next
225+
// combiner in the Combiners vector to consider. The Index is updated when a
226+
// new CombinerSolution is created and pushed onto the priority queue, either
227+
// by including the current combiner (Index increments by 1) or skipping it
228+
// (Index increments by 1). Including a combiner always guarantees that the
229+
// applied combiners are conflict free.
230+
//
231+
// The search prunes branches that cannot surpass the current best solution,
232+
// and prioritizes candidates based on their potential future gain. The
233+
// algorithm considers both applying and skipping each combiner to find the
234+
// best overall combination. The priority queue ensures that we explore
235+
// promising solutions first. When popping an element of the priority queue,
236+
// we also remove it from the queue, thereby guaranteeing we don't search the
237+
// same solution root twice. The algorithm is exhaustive, limited only by the
238+
// maximum number of iterations.
239+
int Iteration = 0;
240+
while (!PQ.empty() && Iteration < MaxSearchIterationCount) {
241+
Iteration++;
242+
243+
// Get best Candidate to continue searching
244+
const CombinerSolution Current = PQ.top();
245+
PQ.pop();
246+
LLVM_DEBUG(dbgs() << "Search " << Current
247+
<< " MaxGain = " << Current.getMaxFutureGain() << "\n");
248+
249+
// Check if Current has finished the search
250+
if (Current.getIndex() == Combiners.size()) {
251+
if (Current.getGain() > BestSolution.getGain()) {
252+
LLVM_DEBUG(dbgs() << " [Search] Updated Optimal Combiner " << Current
253+
<< "\n");
254+
BestSolution = Current;
255+
}
256+
continue;
257+
}
258+
259+
const GenericCombiner *Candidate = Combiners[Current.getIndex()];
260+
261+
// Check if search can be stopped for Current
262+
const auto PotentialGain = getMaxPotentialGain(Current, Current.getIndex());
263+
if (BestSolution.getGain() > PotentialGain) {
264+
LLVM_DEBUG(
265+
dbgs() << " [Search] Cannot Surpass BestScore, skipping exploration "
266+
<< *Candidate << " Candidate: " << Candidate->getGain()
267+
<< " Overlap= "
268+
<< Candidate->getOverlapGain(Current.getCombinersBitVector(),
269+
Combiners)
270+
<< " MaxGain = " << PotentialGain << "\n");
271+
continue;
272+
}
273+
274+
if (Current.hasConflict(Candidate)) {
275+
LLVM_DEBUG(dbgs() << " Conflict, adding Non-Conflict Variant "
276+
<< *Candidate << "Candidate: " << Candidate->getGain()
277+
<< "\n");
278+
} else {
279+
PQ.emplace(Current, Candidate, PotentialGain, Current.getIndex(),
280+
Combiners);
281+
LLVM_DEBUG(dbgs() << " Adding to Stack: " << *Candidate
282+
<< "Candidate: " << Candidate->getGain()
283+
<< " MaxGain = " << PotentialGain << "\n");
284+
}
285+
286+
// Add a solution where no combiner is applied, if it could Surpass
287+
// CurrentBest
288+
const auto NoneMaxFutureGain =
289+
getMaxPotentialGain(Current, Current.getIndex() + 1);
290+
if (BestSolution.getGain() < NoneMaxFutureGain) {
291+
LLVM_DEBUG(dbgs() << " Adding to Stack: None MaxGain = "
292+
<< NoneMaxFutureGain << "\n");
293+
// Idx is not needed, since we do not apply any Combiner
294+
PQ.emplace(Current, nullptr, NoneMaxFutureGain, /*Idx=*/-1, Combiners);
295+
}
296+
} // end while
297+
298+
LLVM_DEBUG(dbgs() << "Search Iterations: " << Iteration << "\n");
299+
LLVM_DEBUG(dbgs() << "Search Result " << BestSolution.getGain() << "\n");
300+
301+
// Save best Candidate to FixedCombiners
214302
std::vector<const GenericCombiner *> Result;
215303
BitVector CombinerBitVec = BestSolution.getCombinersBitVector();
216304
for (int Idx = CombinerBitVec.find_first(); Idx != -1;

llvm/test/CodeGen/AIE/GlobalISel/legalize-dyn-stackalloc.ll

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -246,36 +246,37 @@ define void @test_huge_stack(i32 noundef %n) #0 {
246246
; AIE2-NEXT: padda [p7], m0
247247
; AIE2-NEXT: movxm m0, #-40048
248248
; AIE2-NEXT: mov p0, p7
249+
; AIE2-NEXT: mov p3, p7
249250
; AIE2-NEXT: mov p2, p7
250251
; AIE2-NEXT: mov p6, p7
252+
; AIE2-NEXT: paddb [p3], #-32
251253
; AIE2-NEXT: paddb [p0], m0
252-
; AIE2-NEXT: paddb [p6], #-32
253-
; AIE2-NEXT: movxm m0, #-40032
254254
; AIE2-NEXT: st r0, [p0, #0]
255255
; AIE2-NEXT: lda r0, [p0, #0]
256-
; AIE2-NEXT: paddb [p2], m0
256+
; AIE2-NEXT: paddb [p2], #-24
257+
; AIE2-NEXT: mov r16, p3
258+
; AIE2-NEXT: st r1, [p2, #4]
257259
; AIE2-NEXT: mov p0, sp
258-
; AIE2-NEXT: mov r16, p2
259-
; AIE2-NEXT: st p0, [p6, #0]
260-
; AIE2-NEXT: mov p0, p7
261-
; AIE2-NEXT: paddb [p0], #-24
260+
; AIE2-NEXT: st p0, [p3, #0]
261+
; AIE2-NEXT: mov p0, p1
262262
; AIE2-NEXT: lshl r2, r0, r2
263-
; AIE2-NEXT: st r0, [p0], #4
263+
; AIE2-NEXT: st r0, [p2, #0]
264264
; AIE2-NEXT: add r2, r2, #31
265-
; AIE2-NEXT: st r1, [p0, #0]
265+
; AIE2-NEXT: and r2, r2, r3
266266
; AIE2-NEXT: jl #extern_call
267-
; AIE2-NEXT: mov p0, p1 // Delay Slot 5
268-
; AIE2-NEXT: and r2, r2, r3 // Delay Slot 4
269-
; AIE2-NEXT: mov m0, r2 // Delay Slot 3
270-
; AIE2-NEXT: paddb [p1], m0 // Delay Slot 2
267+
; AIE2-NEXT: mov m0, r2 // Delay Slot 5
268+
; AIE2-NEXT: paddb [p1], m0 // Delay Slot 4
269+
; AIE2-NEXT: movxm m0, #-40032 // Delay Slot 3
270+
; AIE2-NEXT: paddb [p6], m0 // Delay Slot 2
271271
; AIE2-NEXT: mov sp, p1 // Delay Slot 1
272272
; AIE2-NEXT: nopb ; nopa ; nops ; jl #extern_call; nopv
273273
; AIE2-NEXT: nopa ; nopx // Delay Slot 5
274274
; AIE2-NEXT: nop // Delay Slot 4
275275
; AIE2-NEXT: nop // Delay Slot 3
276276
; AIE2-NEXT: nop // Delay Slot 2
277-
; AIE2-NEXT: mov p0, r16 // Delay Slot 1
278-
; AIE2-NEXT: lda p0, [p6, #0]; nopx
277+
; AIE2-NEXT: mov p0, p6 // Delay Slot 1
278+
; AIE2-NEXT: nopb ; nopa ; nops ; nopx ; mov p0, r16; nopv
279+
; AIE2-NEXT: lda p0, [p0, #0]; nopx
279280
; AIE2-NEXT: nop
280281
; AIE2-NEXT: nop
281282
; AIE2-NEXT: nop
@@ -329,18 +330,18 @@ define void @test_huge_stack(i32 noundef %n) #0 {
329330
; AIE2P-NEXT: padda [p0], m0
330331
; AIE2P-NEXT: mova m0, #-32
331332
; AIE2P-NEXT: padda [p3], m0
333+
; AIE2P-NEXT: mova m0, #-24
332334
; AIE2P-NEXT: st r0, [p0, #0]
333335
; AIE2P-NEXT: lda r0, [p0, #0]
334-
; AIE2P-NEXT: mova m0, #-24
335336
; AIE2P-NEXT: mov p0, sp
336337
; AIE2P-NEXT: mov r8, p3
337338
; AIE2P-NEXT: padda [p2], m0
338339
; AIE2P-NEXT: st p0, [p3, #0]
339340
; AIE2P-NEXT: mov p0, p1
341+
; AIE2P-NEXT: st r1, [p2, #4]
340342
; AIE2P-NEXT: lshl r2, r0, r2
341-
; AIE2P-NEXT: st r0, [p2], #4
343+
; AIE2P-NEXT: st r0, [p2, #0]
342344
; AIE2P-NEXT: add r2, r2, #63
343-
; AIE2P-NEXT: st r1, [p2, #0]
344345
; AIE2P-NEXT: and r2, r2, r3
345346
; AIE2P-NEXT: jl #extern_call
346347
; AIE2P-NEXT: mov m0, r2 // Delay Slot 5

llvm/test/CodeGen/AIE/GlobalISel/postinc-with-clustering.mir

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,15 @@ body: |
1919
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0
2020
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $p1
2121
; CHECK-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 4
22-
; CHECK-NEXT: [[AIE_POSTINC_LOAD:%[0-9]+]]:_(s32), [[AIE_POSTINC_LOAD1:%[0-9]+]]:_(p0) = G_AIE_POSTINC_LOAD [[COPY]], [[C]](s20) :: (load (s32))
22+
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32))
2323
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s20) = G_CONSTANT i20 8
24-
; CHECK-NEXT: [[AIE_POSTINC_STORE:%[0-9]+]]:_(p0) = G_AIE_POSTINC_STORE [[AIE_POSTINC_LOAD]](s32), [[COPY1]], [[C1]](s20) :: (store (s32))
25-
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s20) = G_CONSTANT i20 8
26-
; CHECK-NEXT: [[AIE_POSTINC_LOAD2:%[0-9]+]]:_(s32), [[AIE_POSTINC_LOAD3:%[0-9]+]]:_(p0) = G_AIE_POSTINC_LOAD [[AIE_POSTINC_LOAD1]], [[C2]](s20) :: (load (s32))
24+
; CHECK-NEXT: [[AIE_POSTINC_STORE:%[0-9]+]]:_(p0) = G_AIE_POSTINC_STORE [[LOAD]](s32), [[COPY1]], [[C1]](s20) :: (store (s32))
25+
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s20) = G_CONSTANT i20 12
26+
; CHECK-NEXT: [[AIE_OFFSET_LOAD:%[0-9]+]]:_(s32) = G_AIE_OFFSET_LOAD [[COPY]](p0), [[C]](s20) :: (load (s32))
27+
; CHECK-NEXT: G_STORE [[AIE_OFFSET_LOAD]](s32), [[AIE_POSTINC_STORE]](p0) :: (store (s32))
2728
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s20) = G_CONSTANT i20 8
28-
; CHECK-NEXT: [[AIE_POSTINC_STORE1:%[0-9]+]]:_(p0) = G_AIE_POSTINC_STORE [[AIE_POSTINC_LOAD2]](s32), [[AIE_POSTINC_STORE]], [[C3]](s20) :: (store (s32))
29-
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[AIE_POSTINC_LOAD3]](p0) :: (load (s32))
30-
; CHECK-NEXT: G_STORE [[LOAD]](s32), [[AIE_POSTINC_STORE1]](p0) :: (store (s32))
29+
; CHECK-NEXT: [[AIE_OFFSET_LOAD1:%[0-9]+]]:_(s32) = G_AIE_OFFSET_LOAD [[COPY]](p0), [[C2]](s20) :: (load (s32))
30+
; CHECK-NEXT: G_AIE_OFFSET_STORE [[AIE_OFFSET_LOAD1]](s32), [[AIE_POSTINC_STORE]](p0), [[C3]](s20) :: (store (s32))
3131
%0:_(p0) = COPY $p0
3232
%1:_(p0) = COPY $p1
3333
%2:_(s20) = G_CONSTANT i20 4
@@ -59,15 +59,15 @@ body: |
5959
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0
6060
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $p1
6161
; CHECK-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 4
62-
; CHECK-NEXT: [[AIE_POSTINC_LOAD:%[0-9]+]]:_(s32), [[AIE_POSTINC_LOAD1:%[0-9]+]]:_(p0) = G_AIE_POSTINC_LOAD [[COPY]], [[C]](s20) :: (load (s32))
62+
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32))
6363
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s20) = G_CONSTANT i20 8
64-
; CHECK-NEXT: [[AIE_POSTINC_STORE:%[0-9]+]]:_(p0) = G_AIE_POSTINC_STORE [[AIE_POSTINC_LOAD]](s32), [[COPY1]], [[C1]](s20) :: (store (s32))
65-
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s20) = G_CONSTANT i20 8
66-
; CHECK-NEXT: [[AIE_POSTINC_LOAD2:%[0-9]+]]:_(s32), [[AIE_POSTINC_LOAD3:%[0-9]+]]:_(p0) = G_AIE_POSTINC_LOAD [[AIE_POSTINC_LOAD1]], [[C2]](s20) :: (load (s32))
64+
; CHECK-NEXT: [[AIE_POSTINC_STORE:%[0-9]+]]:_(p0) = G_AIE_POSTINC_STORE [[LOAD]](s32), [[COPY1]], [[C1]](s20) :: (store (s32))
65+
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s20) = G_CONSTANT i20 12
66+
; CHECK-NEXT: [[AIE_OFFSET_LOAD:%[0-9]+]]:_(s32) = G_AIE_OFFSET_LOAD [[COPY]](p0), [[C]](s20) :: (load (s32))
67+
; CHECK-NEXT: G_STORE [[AIE_OFFSET_LOAD]](s32), [[AIE_POSTINC_STORE]](p0) :: (store (s32))
6768
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s20) = G_CONSTANT i20 8
68-
; CHECK-NEXT: [[AIE_POSTINC_STORE1:%[0-9]+]]:_(p0) = G_AIE_POSTINC_STORE [[AIE_POSTINC_LOAD2]](s32), [[AIE_POSTINC_STORE]], [[C3]](s20) :: (store (s32))
69-
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[AIE_POSTINC_LOAD3]](p0) :: (load (s32))
70-
; CHECK-NEXT: G_STORE [[LOAD]](s32), [[AIE_POSTINC_STORE1]](p0) :: (store (s32))
69+
; CHECK-NEXT: [[AIE_OFFSET_LOAD1:%[0-9]+]]:_(s32) = G_AIE_OFFSET_LOAD [[COPY]](p0), [[C2]](s20) :: (load (s32))
70+
; CHECK-NEXT: G_AIE_OFFSET_STORE [[AIE_OFFSET_LOAD1]](s32), [[AIE_POSTINC_STORE]](p0), [[C3]](s20) :: (store (s32))
7171
%0:_(p0) = COPY $p0
7272
%1:_(p0) = COPY $p1
7373
%2:_(s20) = G_CONSTANT i20 4
@@ -119,15 +119,15 @@ body: |
119119
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0
120120
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $p1
121121
; CHECK-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 4
122-
; CHECK-NEXT: [[AIE_POSTINC_LOAD:%[0-9]+]]:_(s32), [[AIE_POSTINC_LOAD1:%[0-9]+]]:_(p0) = G_AIE_POSTINC_LOAD [[COPY]], [[C]](s20) :: (load (s32))
122+
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32))
123123
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s20) = G_CONSTANT i20 8
124-
; CHECK-NEXT: [[AIE_POSTINC_STORE:%[0-9]+]]:_(p0) = G_AIE_POSTINC_STORE [[AIE_POSTINC_LOAD]](s32), [[COPY1]], [[C1]](s20) :: (store (s32))
125-
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s20) = G_CONSTANT i20 8
126-
; CHECK-NEXT: [[AIE_POSTINC_LOAD2:%[0-9]+]]:_(s32), [[AIE_POSTINC_LOAD3:%[0-9]+]]:_(p0) = G_AIE_POSTINC_LOAD [[AIE_POSTINC_LOAD1]], [[C2]](s20) :: (load (s32))
124+
; CHECK-NEXT: [[AIE_POSTINC_STORE:%[0-9]+]]:_(p0) = G_AIE_POSTINC_STORE [[LOAD]](s32), [[COPY1]], [[C1]](s20) :: (store (s32))
125+
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s20) = G_CONSTANT i20 12
126+
; CHECK-NEXT: [[AIE_OFFSET_LOAD:%[0-9]+]]:_(s32) = G_AIE_OFFSET_LOAD [[COPY]](p0), [[C]](s20) :: (load (s32))
127+
; CHECK-NEXT: G_STORE [[AIE_OFFSET_LOAD]](s32), [[AIE_POSTINC_STORE]](p0) :: (store (s32))
127128
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s20) = G_CONSTANT i20 8
128-
; CHECK-NEXT: [[AIE_POSTINC_STORE1:%[0-9]+]]:_(p0) = G_AIE_POSTINC_STORE [[AIE_POSTINC_LOAD2]](s32), [[AIE_POSTINC_STORE]], [[C3]](s20) :: (store (s32))
129-
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[AIE_POSTINC_LOAD3]](p0) :: (load (s32))
130-
; CHECK-NEXT: G_STORE [[LOAD]](s32), [[AIE_POSTINC_STORE1]](p0) :: (store (s32))
129+
; CHECK-NEXT: [[AIE_OFFSET_LOAD1:%[0-9]+]]:_(s32) = G_AIE_OFFSET_LOAD [[COPY]](p0), [[C2]](s20) :: (load (s32))
130+
; CHECK-NEXT: G_AIE_OFFSET_STORE [[AIE_OFFSET_LOAD1]](s32), [[AIE_POSTINC_STORE]](p0), [[C3]](s20) :: (store (s32))
131131
%0:_(p0) = COPY $p0
132132
%1:_(p0) = COPY $p1
133133
%2:_(s20) = G_CONSTANT i20 4

llvm/test/CodeGen/AIE/aie2/GlobalISel/prologepilog-tail-call-opt.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,8 @@ define dso_local void @_Z5test2PPv(ptr nocapture readonly %args) local_unnamed_a
4040
; CHECK: bb.0.entry:
4141
; CHECK-NEXT: liveins: $p0
4242
; CHECK-NEXT: {{ $}}
43-
; CHECK-NEXT: renamable $p1, renamable $p0 = LDA_dms_lda_pstm_nrm_imm killed renamable $p0, 4 :: (load (p0) from %ir.args, align 4)
44-
; CHECK-NEXT: renamable $p0 = LDA_dms_lda_idx_imm killed renamable $p0, 0 :: (load (p0) from %ir.arrayidx1, align 4)
43+
; CHECK-NEXT: renamable $p1 = LDA_dms_lda_idx_imm renamable $p0, 0 :: (load (p0) from %ir.args, align 4)
44+
; CHECK-NEXT: renamable $p0 = LDA_dms_lda_idx_imm killed renamable $p0, 4 :: (load (p0) from %ir.arrayidx1, align 4)
4545
; CHECK-NEXT: renamable $r1 = LDA_dms_lda_idx_imm killed renamable $p1, 0 :: (load (s32) from %ir.0)
4646
; CHECK-NEXT: renamable $r2 = LDA_dms_lda_idx_imm killed renamable $p0, 0 :: (load (s32) from %ir.2)
4747
; CHECK-NEXT: PseudoJ_TCO_jump_imm @_Z4funcii, csr_aie2, implicit $r1, implicit $r2

llvm/test/CodeGen/AIE/aie2/conv2d_offset_test.ll

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -20,29 +20,31 @@ define dso_local noundef i32 @_Z3foov() #0 {
2020
; CHECK-LABEL: _Z3foov:
2121
; CHECK: .p2align 4
2222
; CHECK-NEXT: // %bb.0: // %entry
23-
; CHECK-NEXT: nopb ; nopa ; nops ; movxm p0, #(X+92); nopv
24-
; CHECK-NEXT: mova dj0, #96
25-
; CHECK-NEXT: lda.u8 r1, [p0, dj0]
26-
; CHECK-NEXT: mova dj0, #-68
27-
; CHECK-NEXT: lda r0, [p0, #8]
28-
; CHECK-NEXT: lda r1, [p0, #0]
29-
; CHECK-NEXT: lda.u16 r1, [p0, dj0]
30-
; CHECK-NEXT: mova dj0, #-56
31-
; CHECK-NEXT: lda.u8 r1, [p0, dj0]
23+
; CHECK-NEXT: nop ; movxm p1, #(X+92)
24+
; CHECK-NEXT: mova m0, #-164
25+
; CHECK-NEXT: mov p0, p1
26+
; CHECK-NEXT: paddb [p0], #8
27+
; CHECK-NEXT: lda r0, [p0], #88
28+
; CHECK-NEXT: lda.u8 r1, [p0], m0
29+
; CHECK-NEXT: mova m0, #12
30+
; CHECK-NEXT: lda r1, [p1, #0]
31+
; CHECK-NEXT: lda.u16 r1, [p0], m0
32+
; CHECK-NEXT: mova m0, #60
33+
; CHECK-NEXT: lda.u8 r1, [p0], m0
3234
; CHECK-NEXT: nop
33-
; CHECK-NEXT: lda r1, [p0, #4]
35+
; CHECK-NEXT: lda r1, [p0], #76
3436
; CHECK-NEXT: add r0, r0, r1
3537
; CHECK-NEXT: add r0, r0, r1
3638
; CHECK-NEXT: nop
3739
; CHECK-NEXT: add r0, r0, r1
3840
; CHECK-NEXT: add r0, r0, r1
39-
; CHECK-NEXT: lda r1, [p0, #80]
41+
; CHECK-NEXT: lda r1, [p0], #12
4042
; CHECK-NEXT: add r0, r0, r1
41-
; CHECK-NEXT: lda r1, [p0, #92]
42-
; CHECK-NEXT: lda r1, [p0, #44]
43-
; CHECK-NEXT: lda r1, [p0, #8]
44-
; CHECK-NEXT: lda r1, [p0, #76]
45-
; CHECK-NEXT: lda r1, [p0, #-60]
43+
; CHECK-NEXT: lda r1, [p0], #-48
44+
; CHECK-NEXT: lda r1, [p0], #32
45+
; CHECK-NEXT: lda r1, [p1, #8]
46+
; CHECK-NEXT: lda r1, [p0], #-136
47+
; CHECK-NEXT: lda r1, [p0, #0]
4648
; CHECK-NEXT: add r0, r0, r1
4749
; CHECK-NEXT: ret lr
4850
; CHECK-NEXT: add r0, r0, r1 // Delay Slot 5

0 commit comments

Comments
 (0)