Skip to content

Commit a61c867

Browse files
Martien de Jongmartien-de-jong
authored andcommitted
[AIE] PostPipeliner's Latest wasn't updated correctly
1 parent c12ef07 commit a61c867

24 files changed

+576
-758
lines changed

llvm/lib/Target/AIE/AIEPostPipeliner.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -156,12 +156,12 @@ void PostPipeliner::scheduleNode(SUnit &SU, int Cycle,
156156
}
157157
const int SNum = Succ->NodeNum;
158158
const int NewEarliest = Cycle + Latency;
159-
if (NewEarliest > Info[SNum].Earliest) {
159+
if (NewEarliest > Strategy.earliest(*Succ)) {
160160
LLVM_DEBUG(dbgs() << "SU" << SNum << " from " << Info[SNum].Earliest
161161
<< " to " << NewEarliest << " ; ");
162162
Info[SNum].LastEarliestPusher = SU.NodeNum;
163-
Info[SNum].Earliest = NewEarliest;
164163
Info[SU.NodeNum].NumPushedEarliest++;
164+
Strategy.setEarliest(SU.NodeNum, NewEarliest);
165165
Strategy.setChanged();
166166
}
167167
}
@@ -174,12 +174,12 @@ void PostPipeliner::scheduleNode(SUnit &SU, int Cycle,
174174
}
175175
const int PNum = Pred->NodeNum;
176176
const int NewLatest = Cycle - Latency;
177-
if (NewLatest < Info[PNum].Latest) {
177+
if (NewLatest < Strategy.latest(*Pred)) {
178178
LLVM_DEBUG(dbgs() << "SU" << PNum << " from " << Info[PNum].Latest
179179
<< " to " << NewLatest << " ; ");
180180
Info[PNum].LastLatestPusher = SU.NodeNum;
181-
Info[PNum].Latest = NewLatest;
182181
Info[SU.NodeNum].NumPushedLatest++;
182+
Strategy.setLatest(PNum, NewLatest);
183183
Strategy.setChanged();
184184
}
185185
}

llvm/lib/Target/AIE/AIEPostPipeliner.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,11 @@ class PostPipelinerStrategy {
120120
bool Changed = false;
121121

122122
public:
123+
void setEarliest(int Index, int Value) { Info[Index].Earliest = Value; }
124+
void setLatest(int Index, int Value) {
125+
Info[Index].Latest = Value - LatestBias;
126+
}
127+
123128
// Register a change
124129
void setChanged() { Changed = true; }
125130
// Return changed and reset it

llvm/test/CodeGen/AIE/aie2/schedule/postpipeliner/bitwisenot.mir

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,16 +20,15 @@
2020
; CHECK-NEXT: vldb wh0, [p0, #32]; nopa ; nops ; nopxm ; nopv
2121
; CHECK-NEXT: vldb wl0, [p0], #64; nopa ; nops ; nopxm ; nopv
2222
; CHECK-NEXT: vldb wh0, [p0, #32]; nopa ; nops ; nopxm ; nopv
23-
; CHECK-NEXT: vldb wl0, [p0], #64; nopa ; nops ; nopxm ; nopv
24-
; CHECK-NEXT: vldb wh0, [p0, #32]; nopa ; nops ; nopx ; vbneg_ltz.s16 x1, r21, x0; nopv
25-
; CHECK-NEXT: vldb wl0, [p0], #64; nopa ; nops ; nopxm ; nopv
23+
; CHECK-NEXT: vldb wl0, [p0], #64; nopa ; nops ; nopx ; vbneg_ltz.s16 x1, r21, x0; nopv
24+
; CHECK-NEXT: vldb wh0, [p0, #32]; nopa ; nops ; nopxm ; nopv
25+
; CHECK-NEXT: vldb wl0, [p0], #64; nopa ; vst wh1, [p1, #32]; nopx ; vbneg_ltz.s16 x1, r21, x0; nopv
2626
; CHECK-NEXT: .LBB0_1: // %for.body
2727
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
28-
; CHECK-NEXT: vldb wh0, [p0, #32]; nopa ; vst wh1, [p1, #32]; nopx ; vbneg_ltz.s16 x1, r21, x0; nopv
28+
; CHECK-NEXT: vldb wh0, [p0, #32]; nopa ; vst wl1, [p1], #64; nopxm ; nopv
2929
; CHECK-NEXT: .L_LEnd0:
30-
; CHECK-NEXT: vldb wl0, [p0], #64; nopa ; vst wl1, [p1], #64; nopxm ; nopv
30+
; CHECK-NEXT: vldb wl0, [p0], #64; nopa ; vst wh1, [p1, #32]; nopx ; vbneg_ltz.s16 x1, r21, x0; nopv
3131
; CHECK-NEXT: .LBB0_2: // %for.cond.cleanup
32-
; CHECK-NEXT: nopb ; nopa ; vst wh1, [p1, #32]; nopx ; vbneg_ltz.s16 x1, r21, x0; nopv
3332
; CHECK-NEXT: vst wl1, [p1], #64; nopx
3433
; CHECK-NEXT: vst wh1, [p1, #32]; vbneg_ltz.s16 x1, r21, x0
3534
; CHECK-NEXT: vst wl1, [p1], #64

llvm/test/CodeGen/AIE/aie2/schedule/postpipeliner/bitwisexor.mir

Lines changed: 14 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -26,40 +26,33 @@
2626
; CHECK-NEXT: nop // Delay Slot 2
2727
; CHECK-NEXT: nop // Delay Slot 1
2828
; CHECK-NEXT: // %bb.1: // %for.body.preheader
29-
; CHECK-NEXT: vlda wh1, [p0, #32]; vldb wh0, [p1, #32]; nopx
29+
; CHECK-NEXT: vlda wh1, [p0, #32]; vldb wh0, [p1, #32]; nopm
3030
; CHECK-NEXT: vlda wl1, [p0], #64; vldb wl0, [p1], #64
3131
; CHECK-NEXT: nop
3232
; CHECK-NEXT: nop
33-
; CHECK-NEXT: nop
34-
; CHECK-NEXT: vlda wh1, [p0, #32]; vldb wh0, [p1, #32]; add.nc lc, r0, #-3
35-
; CHECK-NEXT: vlda wl1, [p0], #64; vldb wl0, [p1], #64; movxm ls, #.LBB0_2
36-
; CHECK-NEXT: movxm le, #.L_LEnd0
33+
; CHECK-NEXT: add.nc lc, r0, #-2
34+
; CHECK-NEXT: vlda wh1, [p0, #32]; vldb wh0, [p1, #32]; movxm ls, #.LBB0_2
35+
; CHECK-NEXT: vlda wl1, [p0], #64; vldb wl0, [p1], #64; movxm le, #.L_LEnd0
3736
; CHECK-NEXT: nopb ; nopa ; nops ; nopx ; vbneg_ltz.s8 x2, r25:r24, x0; nopv
3837
; CHECK-NEXT: nopb ; nopa ; nops ; nopx ; vbneg_ltz.s8 x3, r25:r24, x1; nopv
39-
; CHECK-NEXT: vldb wh0, [p1, #32]; vlda wh1, [p0, #32]; nops ; nopx ; vband x4, x0, x3; nopv
40-
; CHECK-NEXT: vldb wl0, [p1], #64; vlda wl1, [p0], #64; nops ; nopx ; vband x5, x1, x2; nopv
41-
; CHECK-NEXT: vbor x6, x4, x5
42-
; CHECK-NEXT: vbneg_ltz.s8 x2, r25:r24, x0
43-
; CHECK-NEXT: vst wh6, [p2, #32]; vbneg_ltz.s8 x3, r25:r24, x1
38+
; CHECK-NEXT: nopb ; nopa ; nops ; nopx ; vband x4, x0, x3; nopv
4439
; CHECK-NEXT: .p2align 4
4540
; CHECK-NEXT: .LBB0_2: // %for.body
4641
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
47-
; CHECK-NEXT: vldb wh0, [p1, #32]; vlda wh1, [p0, #32]; vst wl6, [p2], #64; nopx ; vband x4, x0, x3; nopv
48-
; CHECK-NEXT: vldb wl0, [p1], #64; vlda wl1, [p0], #64; nops ; nopx ; vband x5, x1, x2; nopv
49-
; CHECK-NEXT: nopb ; nopa ; nops ; nopx ; vbor x6, x4, x5; nopv
50-
; CHECK-NEXT: nopb ; nopa ; nops ; nopx ; vbneg_ltz.s8 x2, r25:r24, x0; nopv
42+
; CHECK-NEXT: vldb wh0, [p1, #32]; vlda wh1, [p0, #32]; nops ; nopx ; vband x5, x1, x2; nopv
43+
; CHECK-NEXT: vldb wl0, [p1], #64; vlda wl1, [p0], #64; nops ; nopx ; vbor x6, x4, x5; nopv
44+
; CHECK-NEXT: nopb ; nopa ; vst wh6, [p2, #32]; nopx ; vbneg_ltz.s8 x2, r25:r24, x0; nopv
45+
; CHECK-NEXT: nopb ; nopa ; vst wl6, [p2], #64; nopx ; vbneg_ltz.s8 x3, r25:r24, x1; nopv
5146
; CHECK-NEXT: .L_LEnd0:
52-
; CHECK-NEXT: nopb ; nopa ; vst wh6, [p2, #32]; nopx ; vbneg_ltz.s8 x3, r25:r24, x1; nopv
47+
; CHECK-NEXT: nopb ; nopa ; nops ; nopx ; vband x4, x0, x3; nopv
5348
; CHECK-NEXT: // %bb.3: // %for.cond.cleanup
54-
; CHECK-NEXT: nopb ; nopa ; vst wl6, [p2], #64; nopx ; vband x4, x0, x3; nopv
55-
; CHECK-NEXT: vband x5, x1, x2
49+
; CHECK-NEXT: nopa ; vband x5, x1, x2
5650
; CHECK-NEXT: vbor x6, x4, x5
57-
; CHECK-NEXT: vbneg_ltz.s8 x2, r25:r24, x0
58-
; CHECK-NEXT: vst wh6, [p2, #32]; vbneg_ltz.s8 x3, r25:r24, x1
59-
; CHECK-NEXT: vst wl6, [p2], #64; vband x4, x0, x3
51+
; CHECK-NEXT: vst wh6, [p2, #32]; vbneg_ltz.s8 x2, r25:r24, x0
52+
; CHECK-NEXT: vst wl6, [p2], #64; vbneg_ltz.s8 x3, r25:r24, x1
53+
; CHECK-NEXT: vband x4, x0, x3
6054
; CHECK-NEXT: vband x5, x1, x2
6155
; CHECK-NEXT: vbor x6, x4, x5
62-
; CHECK-NEXT: nop
6356
; CHECK-NEXT: vst wh6, [p2, #32]
6457
; CHECK-NEXT: vst wl6, [p2], #64
6558
; CHECK-NEXT: nop

llvm/test/CodeGen/AIE/aie2/schedule/postpipeliner/conv2d_bf16-1.mir

Lines changed: 36 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -25,68 +25,54 @@
2525
; CHECK-NEXT: nop // Delay Slot 2
2626
; CHECK-NEXT: nop // Delay Slot 1
2727
; CHECK-NEXT: // %bb.1: // %for.body.preheader
28-
; CHECK-NEXT: vlda wh9, [p4, #416]; nopxm
29-
; CHECK-NEXT: vlda wh7, [p4, #352]
30-
; CHECK-NEXT: vlda wl7, [p4, #320]
31-
; CHECK-NEXT: vlda wl9, [p4, #384]
32-
; CHECK-NEXT: vlda wh11, [p4, #480]
33-
; CHECK-NEXT: vlda wl11, [p4, #448]; mov p7, p5
34-
; CHECK-NEXT: vldb wh8, [p0, #32]; mov p4, p7
28+
; CHECK-NEXT: vldb wh8, [p0, #32]; nopx ; mov p7, p5
3529
; CHECK-NEXT: vldb wl8, [p0], m4
3630
; CHECK-NEXT: vldb wh10, [p0, #32]
37-
; CHECK-NEXT: vldb wl10, [p0], m4
38-
; CHECK-NEXT: vldb wh1, [p0, #32]
39-
; CHECK-NEXT: vldb wl1, [p0], m4
40-
; CHECK-NEXT: vldb wh3, [p0, #32]; add.nc lc, r0, #-1
41-
; CHECK-NEXT: vldb.3d wl3, [p0], d1; movxm ls, #.LBB0_2
42-
; CHECK-NEXT: vshift.align x0, x0, s0, x8, r3
43-
; CHECK-NEXT: movxm le, #.L_LEnd0
44-
; CHECK-NEXT: vshift.align x2, x2, s0, x10, r3
45-
; CHECK-NEXT: vshuffle x5, x0, x2, r25
46-
; CHECK-NEXT: vldb wh5, [p5, #32]; vshuffle x8, x0, x2, r9
31+
; CHECK-NEXT: vldb wl10, [p0], m4; mov p4, p7
32+
; CHECK-NEXT: vldb wh1, [p0, #32]; add.nc lc, r0, #-1
33+
; CHECK-NEXT: vldb wl1, [p0], m4; movxm ls, #.LBB0_2
34+
; CHECK-NEXT: vldb wh3, [p0, #32]; movxm le, #.L_LEnd0
35+
; CHECK-NEXT: vldb.3d wl3, [p0], d1; vshift.align x0, x0, s0, x8, r3
36+
; CHECK-NEXT: vldb wh5, [p5, #32]; vshift.align x2, x2, s0, x10, r3
4737
; CHECK-NEXT: vlda wl5, [p5], #256; vshift.align x4, x4, s0, x1, r3
48-
; CHECK-NEXT: vshift.align x6, x6, s0, x3, r3; vmac.f bmh1, bmh1, x8, x9, r29
38+
; CHECK-NEXT: vlda wh7, [p4, #352]; vshift.align x6, x6, s0, x3, r3
39+
; CHECK-NEXT: vlda wl7, [p4, #320]; and r3, r3, r0; vshuffle x8, x0, x2, r9; vmac.f bmh1, bmh1, x8, x9, r29
40+
; CHECK-NEXT: vlda wh9, [p4, #416]; add r3, r3, #34; vshuffle x3, x4, x6, r9; vmac.f bml4, bml4, x8, x7, r29
41+
; CHECK-NEXT: vlda wl9, [p4, #384]; vshuffle x5, x0, x2, r25; vmac.f bmh7, bmh7, x8, x5, r29
42+
; CHECK-NEXT: vlda wh11, [p4, #480]; vshuffle x10, x4, x6, r25; vmac.f bmh5, bmh5, x1, x5, r29
43+
; CHECK-NEXT: vlda wl11, [p4, #448]; vshuffle x1, x3, x5, r13; vmac.f bml2, bml2, x3, x5, r29
4944
; CHECK-NEXT: .p2align 4
5045
; CHECK-NEXT: .LBB0_2: // %for.body
5146
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
52-
; CHECK-NEXT: vlda wh9, [p4, #416]; vshuffle x10, x4, x6, r25; vmac.f bml4, bml4, x8, x7, r29
53-
; CHECK-NEXT: vlda wh7, [p4, #352]; vshuffle x3, x4, x6, r9; vmac.f bmh6, bmh6, x8, x11, r29
54-
; CHECK-NEXT: vlda wl7, [p4, #320]; vshuffle x1, x3, x5, r13; vmac.f bmh2, bmh2, x10, x9, r29
55-
; CHECK-NEXT: vlda wl9, [p4, #384]; vshuffle x3, x3, x5, r24; vmac.f bml5, bml5, x10, x7, r29
56-
; CHECK-NEXT: vlda wh11, [p4, #480]; mov r3, p0; vmac.f bmh0, bmh0, x1, x9, r29
57-
; CHECK-NEXT: vlda wl11, [p4, #448]; and r3, r3, r0; mov p7, p5; vmac.f bmh3, bmh3, x3, x9, r29
58-
; CHECK-NEXT: vldb wh8, [p0, #32]; add r3, r3, #34; mov p4, p7; vmac.f bml3, bml3, x1, x7, r29
59-
; CHECK-NEXT: vldb wl8, [p0], m4; vmac.f bml6, bml6, x3, x7, r29
60-
; CHECK-NEXT: vldb wh10, [p0, #32]; vmac.f bmh4, bmh4, x1, x11, r29
61-
; CHECK-NEXT: vldb wl10, [p0], m4; vmac.f bml1, bml1, x3, x11, r29
62-
; CHECK-NEXT: vldb wh1, [p0, #32]; vmac.f bmh8, bmh8, x10, x11, r29
63-
; CHECK-NEXT: vldb wl1, [p0], m4; vmac.f bmh7, bmh7, x8, x5, r29
64-
; CHECK-NEXT: vldb wh3, [p0, #32]; vmac.f bmh5, bmh5, x1, x5, r29
65-
; CHECK-NEXT: vldb.3d wl3, [p0], d1; vmac.f bml2, bml2, x3, x5, r29
66-
; CHECK-NEXT: vshift.align x0, x0, s0, x8, r3; vmac.f bml0, bml0, x10, x5, r29
67-
; CHECK-NEXT: nop
68-
; CHECK-NEXT: vshift.align x2, x2, s0, x10, r3
69-
; CHECK-NEXT: vshuffle x5, x0, x2, r25
70-
; CHECK-NEXT: vldb wh5, [p5, #32]; vshuffle x8, x0, x2, r9
71-
; CHECK-NEXT: vlda wl5, [p5], #256; vshift.align x4, x4, s0, x1, r3
47+
; CHECK-NEXT: vldb wh8, [p0, #32]; nopa ; nops ; nopx ; mov p7, p5; vmac.f bml0, bml0, x10, x5, r29
48+
; CHECK-NEXT: vldb wl8, [p0], m4; nopa ; nops ; nopx ; vshuffle x3, x3, x5, r24; vmac.f bmh0, bmh0, x1, x9, r29
49+
; CHECK-NEXT: vldb wh10, [p0, #32]; mov r3, p0; vmac.f bmh3, bmh3, x3, x9, r29
50+
; CHECK-NEXT: vldb wl10, [p0], m4; mov p4, p7; vmac.f bmh2, bmh2, x10, x9, r29
51+
; CHECK-NEXT: vldb wh1, [p0, #32]; vmac.f bml3, bml3, x1, x7, r29
52+
; CHECK-NEXT: vldb wl1, [p0], m4; vmac.f bml6, bml6, x3, x7, r29
53+
; CHECK-NEXT: vldb wh3, [p0, #32]; vmac.f bml5, bml5, x10, x7, r29
54+
; CHECK-NEXT: vldb.3d wl3, [p0], d1; vshift.align x0, x0, s0, x8, r3; vmac.f bmh6, bmh6, x8, x11, r29
55+
; CHECK-NEXT: vldb wh5, [p5, #32]; vshift.align x2, x2, s0, x10, r3; vmac.f bmh4, bmh4, x1, x11, r29
56+
; CHECK-NEXT: vlda wl5, [p5], #256; vshift.align x4, x4, s0, x1, r3; vmac.f bml1, bml1, x3, x11, r29
57+
; CHECK-NEXT: vlda wh7, [p4, #352]; vshift.align x6, x6, s0, x3, r3; vmac.f bmh8, bmh8, x10, x11, r29
58+
; CHECK-NEXT: vlda wl7, [p4, #320]; and r3, r3, r0; vshuffle x8, x0, x2, r9; vmac.f bmh1, bmh1, x8, x9, r29
59+
; CHECK-NEXT: vlda wh9, [p4, #416]; add r3, r3, #34; vshuffle x3, x4, x6, r9; vmac.f bml4, bml4, x8, x7, r29
60+
; CHECK-NEXT: vlda wl9, [p4, #384]; vshuffle x5, x0, x2, r25; vmac.f bmh7, bmh7, x8, x5, r29
61+
; CHECK-NEXT: vlda wh11, [p4, #480]; vshuffle x10, x4, x6, r25; vmac.f bmh5, bmh5, x1, x5, r29
7262
; CHECK-NEXT: .L_LEnd0:
73-
; CHECK-NEXT: nopb ; nopa ; nops ; nopx ; vshift.align x6, x6, s0, x3, r3; vmac.f bmh1, bmh1, x8, x9, r29
63+
; CHECK-NEXT: nopb ; vlda wl11, [p4, #448]; nops ; nopx ; vshuffle x1, x3, x5, r13; vmac.f bml2, bml2, x3, x5, r29
7464
; CHECK-NEXT: // %bb.3: // %for.cond.cleanup
75-
; CHECK-NEXT: nopb ; nopa ; nops ; nopx ; vshuffle x10, x4, x6, r25; vmac.f bml4, bml4, x8, x7, r29
76-
; CHECK-NEXT: nopa ; nopx ; vshuffle x3, x4, x6, r9; vmac.f bmh6, bmh6, x8, x11, r29
77-
; CHECK-NEXT: vshuffle x1, x3, x5, r13; vmac.f bmh2, bmh2, x10, x9, r29
78-
; CHECK-NEXT: vshuffle x3, x3, x5, r24; vmac.f bml5, bml5, x10, x7, r29
79-
; CHECK-NEXT: mov r3, p0; vmac.f bmh0, bmh0, x1, x9, r29
80-
; CHECK-NEXT: and r3, r3, r0; vmac.f bmh3, bmh3, x3, x9, r29
81-
; CHECK-NEXT: add r3, r3, #34; vmac.f bml3, bml3, x1, x7, r29
65+
; CHECK-NEXT: vmac.f bml0, bml0, x10, x5, r29
66+
; CHECK-NEXT: vshuffle x3, x3, x5, r24; vmac.f bmh0, bmh0, x1, x9, r29
67+
; CHECK-NEXT: mov r3, p0; vmac.f bmh3, bmh3, x3, x9, r29
68+
; CHECK-NEXT: vmac.f bmh2, bmh2, x10, x9, r29
69+
; CHECK-NEXT: vmac.f bml3, bml3, x1, x7, r29
8270
; CHECK-NEXT: vmac.f bml6, bml6, x3, x7, r29
71+
; CHECK-NEXT: vmac.f bml5, bml5, x10, x7, r29
72+
; CHECK-NEXT: vmac.f bmh6, bmh6, x8, x11, r29
8373
; CHECK-NEXT: vmac.f bmh4, bmh4, x1, x11, r29
8474
; CHECK-NEXT: vmac.f bml1, bml1, x3, x11, r29
8575
; CHECK-NEXT: vmac.f bmh8, bmh8, x10, x11, r29
86-
; CHECK-NEXT: vmac.f bmh7, bmh7, x8, x5, r29
87-
; CHECK-NEXT: vmac.f bmh5, bmh5, x1, x5, r29
88-
; CHECK-NEXT: vmac.f bml2, bml2, x3, x5, r29
89-
; CHECK-NEXT: vmac.f bml0, bml0, x10, x5, r29
9076
; CHECK-NEXT: nop
9177
; CHECK-NEXT: nop
9278
; CHECK-NEXT: nop

llvm/test/CodeGen/AIE/aie2/schedule/postpipeliner/conv2d_bf16-2.mir

Lines changed: 35 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -26,69 +26,57 @@
2626
; CHECK-NEXT: nop // Delay Slot 2
2727
; CHECK-NEXT: nop // Delay Slot 1
2828
; CHECK-NEXT: // %bb.1: // %for.body.preheader
29-
; CHECK-NEXT: nopa ; vldb wh7, [p5, #32]; nopxm
30-
; CHECK-NEXT: vldb wl7, [p5], #64
31-
; CHECK-NEXT: vldb wh8, [p4, #32]
29+
; CHECK-NEXT: vldb wh8, [p4, #32]; nopxm
3230
; CHECK-NEXT: vldb wl8, [p4], m4
33-
; CHECK-NEXT: nop
3431
; CHECK-NEXT: vldb wh10, [p4, #32]
35-
; CHECK-NEXT: vldb wl10, [p4], m4
36-
; CHECK-NEXT: vldb wh1, [p4, #32]
37-
; CHECK-NEXT: vldb wl1, [p4], m4; movxm ls, #.LBB0_2
32+
; CHECK-NEXT: vldb wl10, [p4], m4; movxm ls, #.LBB0_2
33+
; CHECK-NEXT: vldb wh1, [p4, #32]; mov r3, p4
34+
; CHECK-NEXT: vldb wl1, [p4], m4; and r3, r3, r6; add.nc lc, r0, #-1
3835
; CHECK-NEXT: vldb wh3, [p4, #32]; movxm le, #.L_LEnd0
3936
; CHECK-NEXT: vldb.3d wl3, [p4], d0; vshift.align x0, x0, s0, x8, r21
40-
; CHECK-NEXT: mov r3, p4
41-
; CHECK-NEXT: and r3, r3, r6; add.nc lc, r0, #-1
42-
; CHECK-NEXT: vshift.align x2, x2, s0, x10, r21
43-
; CHECK-NEXT: vshuffle x5, x0, x2, r25
44-
; CHECK-NEXT: vldb wh5, [p5, #32]; vshift.align x4, x4, s0, x1, r21
45-
; CHECK-NEXT: vldb wl5, [p5], #64; vshuffle x8, x0, x2, r10
46-
; CHECK-NEXT: vldb wh9, [p5, #32]; add r21, r3, #34; vshift.align x6, x6, s0, x3, r21
47-
; CHECK-NEXT: vldb wl9, [p5], #64; vshuffle x3, x4, x6, r10; vmac.f bmh4, bmh4, x8, x7, r14
48-
; CHECK-NEXT: vldb wh11, [p5, #32]; vshuffle x10, x4, x6, r25
49-
; CHECK-NEXT: vldb wl11, [p5], #64; vshuffle x1, x3, x5, r15
37+
; CHECK-NEXT: vldb wh7, [p5, #32]; vshift.align x2, x2, s0, x10, r21
38+
; CHECK-NEXT: vldb wl7, [p5], #64; vshift.align x4, x4, s0, x1, r21
39+
; CHECK-NEXT: vldb wh5, [p5, #32]; add r21, r3, #34; vshift.align x6, x6, s0, x3, r21
40+
; CHECK-NEXT: vldb wl5, [p5], #64; vshuffle x8, x0, x2, r10; vmac.f bmh4, bmh4, x8, x7, r14
41+
; CHECK-NEXT: vldb wh9, [p5, #32]; vshuffle x3, x4, x6, r10
42+
; CHECK-NEXT: vldb wl9, [p5], #64; vshuffle x5, x0, x2, r25; vmac.f bml0, bml0, x10, x7, r14
43+
; CHECK-NEXT: vldb wh11, [p5, #32]; vshuffle x10, x4, x6, r25; vmac.f bmh3, bmh3, x1, x7, r14
44+
; CHECK-NEXT: vldb wl11, [p5], #64; vshuffle x1, x3, x5, r15; vmac.f bml1, bml1, x3, x7, r14
5045
; CHECK-NEXT: .p2align 4
5146
; CHECK-NEXT: .LBB0_2: // %for.body
5247
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
53-
; CHECK-NEXT: vldb wh7, [p5, #32]; nopx ; vshuffle x3, x3, x5, r24; vmac.f bml0, bml0, x10, x7, r14
54-
; CHECK-NEXT: vldb wl7, [p5], #64; vmac.f bmh3, bmh3, x1, x7, r14
55-
; CHECK-NEXT: vldb wh8, [p4, #32]; vmac.f bml5, bml5, x10, x5, r14
56-
; CHECK-NEXT: vldb wl8, [p4], m4; vmac.f bml2, bml2, x8, x5, r14
57-
; CHECK-NEXT: vmac.f bml1, bml1, x3, x7, r14
58-
; CHECK-NEXT: vldb wh10, [p4, #32]; vmac.f bmh2, bmh2, x8, x9, r14
59-
; CHECK-NEXT: vldb wl10, [p4], m4; vmac.f bml3, bml3, x10, x11, r14
60-
; CHECK-NEXT: vldb wh1, [p4, #32]; vmac.f bmh7, bmh7, x8, x11, r14
61-
; CHECK-NEXT: vldb wl1, [p4], m4; vmac.f bmh1, bmh1, x1, x9, r14
62-
; CHECK-NEXT: vldb wh3, [p4, #32]; vmac.f bmh6, bmh6, x3, x9, r14
63-
; CHECK-NEXT: vldb.3d wl3, [p4], d0; vshift.align x0, x0, s0, x8, r21; vmac.f bmh5, bmh5, x10, x9, r14
64-
; CHECK-NEXT: mov r3, p4; vmac.f bmh8, bmh8, x1, x5, r14
65-
; CHECK-NEXT: and r3, r3, r6; vmac.f bml6, bml6, x3, x5, r14
66-
; CHECK-NEXT: vshift.align x2, x2, s0, x10, r21; vmac.f bmh0, bmh0, x1, x11, r14
67-
; CHECK-NEXT: vshuffle x5, x0, x2, r25; vmac.f bml4, bml4, x3, x11, r14
68-
; CHECK-NEXT: vldb wh5, [p5, #32]; vshift.align x4, x4, s0, x1, r21
69-
; CHECK-NEXT: vldb wl5, [p5], #64; vshuffle x8, x0, x2, r10
70-
; CHECK-NEXT: vldb wh9, [p5, #32]; add r21, r3, #34; vshift.align x6, x6, s0, x3, r21
71-
; CHECK-NEXT: vldb wl9, [p5], #64; vshuffle x3, x4, x6, r10; vmac.f bmh4, bmh4, x8, x7, r14
72-
; CHECK-NEXT: vldb wh11, [p5, #32]; vshuffle x10, x4, x6, r25
48+
; CHECK-NEXT: vldb wh8, [p4, #32]; vshuffle x3, x3, x5, r24; vmac.f bmh2, bmh2, x8, x9, r14
49+
; CHECK-NEXT: vldb wl8, [p4], m4; vmac.f bmh1, bmh1, x1, x9, r14
50+
; CHECK-NEXT: vldb wh10, [p4, #32]; vmac.f bmh6, bmh6, x3, x9, r14
51+
; CHECK-NEXT: vldb wl10, [p4], m4; vmac.f bmh5, bmh5, x10, x9, r14
52+
; CHECK-NEXT: vldb wh1, [p4, #32]; mov r3, p4; vmac.f bml2, bml2, x8, x5, r14
53+
; CHECK-NEXT: vldb wl1, [p4], m4; and r3, r3, r6; vmac.f bmh8, bmh8, x1, x5, r14
54+
; CHECK-NEXT: vldb wh3, [p4, #32]; vmac.f bml6, bml6, x3, x5, r14
55+
; CHECK-NEXT: vldb.3d wl3, [p4], d0; vshift.align x0, x0, s0, x8, r21; vmac.f bml5, bml5, x10, x5, r14
56+
; CHECK-NEXT: vldb wh7, [p5, #32]; vshift.align x2, x2, s0, x10, r21; vmac.f bmh7, bmh7, x8, x11, r14
57+
; CHECK-NEXT: vldb wl7, [p5], #64; vshift.align x4, x4, s0, x1, r21; vmac.f bmh0, bmh0, x1, x11, r14
58+
; CHECK-NEXT: vldb wh5, [p5, #32]; add r21, r3, #34; vshift.align x6, x6, s0, x3, r21; vmac.f bml4, bml4, x3, x11, r14
59+
; CHECK-NEXT: vldb wl5, [p5], #64; vshuffle x8, x0, x2, r10; vmac.f bmh4, bmh4, x8, x7, r14
60+
; CHECK-NEXT: vldb wh9, [p5, #32]; vshuffle x3, x4, x6, r10; vmac.f bml3, bml3, x10, x11, r14
61+
; CHECK-NEXT: vldb wl9, [p5], #64; vshuffle x5, x0, x2, r25; vmac.f bml0, bml0, x10, x7, r14
62+
; CHECK-NEXT: vldb wh11, [p5, #32]; vshuffle x10, x4, x6, r25; vmac.f bmh3, bmh3, x1, x7, r14
7363
; CHECK-NEXT: .L_LEnd0:
74-
; CHECK-NEXT: vldb wl11, [p5], #64; nopa ; nops ; nopx ; vshuffle x1, x3, x5, r15; nopv
64+
; CHECK-NEXT: vldb wl11, [p5], #64; nopa ; nops ; nopx ; vshuffle x1, x3, x5, r15; vmac.f bml1, bml1, x3, x7, r14
7565
; CHECK-NEXT: // %bb.3: // %for.cond.cleanup
76-
; CHECK-NEXT: nopa ; nopx ; vshuffle x3, x3, x5, r24; vmac.f bml0, bml0, x10, x7, r14
77-
; CHECK-NEXT: vmac.f bmh3, bmh3, x1, x7, r14
78-
; CHECK-NEXT: vmac.f bml5, bml5, x10, x5, r14
79-
; CHECK-NEXT: vmac.f bml2, bml2, x8, x5, r14
80-
; CHECK-NEXT: vmac.f bml1, bml1, x3, x7, r14
81-
; CHECK-NEXT: vmac.f bmh2, bmh2, x8, x9, r14
82-
; CHECK-NEXT: vmac.f bml3, bml3, x10, x11, r14
83-
; CHECK-NEXT: vmac.f bmh7, bmh7, x8, x11, r14
84-
; CHECK-NEXT: vmac.f bmh1, bmh1, x1, x9, r14
66+
; CHECK-NEXT: nopb ; nopa ; nops ; nopx ; vshuffle x3, x3, x5, r24; vmac.f bmh2, bmh2, x8, x9, r14
67+
; CHECK-NEXT: nopa ; nopx ; vmac.f bmh1, bmh1, x1, x9, r14
8568
; CHECK-NEXT: vmac.f bmh6, bmh6, x3, x9, r14
8669
; CHECK-NEXT: vmac.f bmh5, bmh5, x10, x9, r14
70+
; CHECK-NEXT: vmac.f bml2, bml2, x8, x5, r14
8771
; CHECK-NEXT: vmac.f bmh8, bmh8, x1, x5, r14
8872
; CHECK-NEXT: vmac.f bml6, bml6, x3, x5, r14
73+
; CHECK-NEXT: vmac.f bml5, bml5, x10, x5, r14
74+
; CHECK-NEXT: vmac.f bmh7, bmh7, x8, x11, r14
8975
; CHECK-NEXT: vmac.f bmh0, bmh0, x1, x11, r14
9076
; CHECK-NEXT: vmac.f bml4, bml4, x3, x11, r14
9177
; CHECK-NEXT: nop
78+
; CHECK-NEXT: vmac.f bml3, bml3, x10, x11, r14
79+
; CHECK-NEXT: nop
9280
; CHECK-NEXT: nop
9381
; CHECK-NEXT: nop
9482
; CHECK-NEXT: nop

0 commit comments

Comments
 (0)