|
25 | 25 | ; CHECK-NEXT: nop // Delay Slot 2 |
26 | 26 | ; CHECK-NEXT: nop // Delay Slot 1 |
27 | 27 | ; CHECK-NEXT: // %bb.1: // %for.body.preheader |
28 | | - ; CHECK-NEXT: vlda wh9, [p4, #416]; nopxm |
29 | | - ; CHECK-NEXT: vlda wh7, [p4, #352] |
30 | | - ; CHECK-NEXT: vlda wl7, [p4, #320] |
31 | | - ; CHECK-NEXT: vlda wl9, [p4, #384] |
32 | | - ; CHECK-NEXT: vlda wh11, [p4, #480] |
33 | | - ; CHECK-NEXT: vlda wl11, [p4, #448]; mov p7, p5 |
34 | | - ; CHECK-NEXT: vldb wh8, [p0, #32]; mov p4, p7 |
| 28 | + ; CHECK-NEXT: vldb wh8, [p0, #32]; nopx ; mov p7, p5 |
35 | 29 | ; CHECK-NEXT: vldb wl8, [p0], m4 |
36 | 30 | ; CHECK-NEXT: vldb wh10, [p0, #32] |
37 | | - ; CHECK-NEXT: vldb wl10, [p0], m4 |
38 | | - ; CHECK-NEXT: vldb wh1, [p0, #32] |
39 | | - ; CHECK-NEXT: vldb wl1, [p0], m4 |
40 | | - ; CHECK-NEXT: vldb wh3, [p0, #32]; add.nc lc, r0, #-1 |
41 | | - ; CHECK-NEXT: vldb.3d wl3, [p0], d1; movxm ls, #.LBB0_2 |
42 | | - ; CHECK-NEXT: vshift.align x0, x0, s0, x8, r3 |
43 | | - ; CHECK-NEXT: movxm le, #.L_LEnd0 |
44 | | - ; CHECK-NEXT: vshift.align x2, x2, s0, x10, r3 |
45 | | - ; CHECK-NEXT: vshuffle x5, x0, x2, r25 |
46 | | - ; CHECK-NEXT: vldb wh5, [p5, #32]; vshuffle x8, x0, x2, r9 |
| 31 | + ; CHECK-NEXT: vldb wl10, [p0], m4; mov p4, p7 |
| 32 | + ; CHECK-NEXT: vldb wh1, [p0, #32]; add.nc lc, r0, #-1 |
| 33 | + ; CHECK-NEXT: vldb wl1, [p0], m4; movxm ls, #.LBB0_2 |
| 34 | + ; CHECK-NEXT: vldb wh3, [p0, #32]; movxm le, #.L_LEnd0 |
| 35 | + ; CHECK-NEXT: vldb.3d wl3, [p0], d1; vshift.align x0, x0, s0, x8, r3 |
| 36 | + ; CHECK-NEXT: vldb wh5, [p5, #32]; vshift.align x2, x2, s0, x10, r3 |
47 | 37 | ; CHECK-NEXT: vlda wl5, [p5], #256; vshift.align x4, x4, s0, x1, r3 |
48 | | - ; CHECK-NEXT: vshift.align x6, x6, s0, x3, r3; vmac.f bmh1, bmh1, x8, x9, r29 |
| 38 | + ; CHECK-NEXT: vlda wh7, [p4, #352]; vshift.align x6, x6, s0, x3, r3 |
| 39 | + ; CHECK-NEXT: vlda wl7, [p4, #320]; and r3, r3, r0; vshuffle x8, x0, x2, r9; vmac.f bmh1, bmh1, x8, x9, r29 |
| 40 | + ; CHECK-NEXT: vlda wh9, [p4, #416]; add r3, r3, #34; vshuffle x3, x4, x6, r9; vmac.f bml4, bml4, x8, x7, r29 |
| 41 | + ; CHECK-NEXT: vlda wl9, [p4, #384]; vshuffle x5, x0, x2, r25; vmac.f bmh7, bmh7, x8, x5, r29 |
| 42 | + ; CHECK-NEXT: vlda wh11, [p4, #480]; vshuffle x10, x4, x6, r25; vmac.f bmh5, bmh5, x1, x5, r29 |
| 43 | + ; CHECK-NEXT: vlda wl11, [p4, #448]; vshuffle x1, x3, x5, r13; vmac.f bml2, bml2, x3, x5, r29 |
49 | 44 | ; CHECK-NEXT: .p2align 4 |
50 | 45 | ; CHECK-NEXT: .LBB0_2: // %for.body |
51 | 46 | ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
52 | | - ; CHECK-NEXT: vlda wh9, [p4, #416]; vshuffle x10, x4, x6, r25; vmac.f bml4, bml4, x8, x7, r29 |
53 | | - ; CHECK-NEXT: vlda wh7, [p4, #352]; vshuffle x3, x4, x6, r9; vmac.f bmh6, bmh6, x8, x11, r29 |
54 | | - ; CHECK-NEXT: vlda wl7, [p4, #320]; vshuffle x1, x3, x5, r13; vmac.f bmh2, bmh2, x10, x9, r29 |
55 | | - ; CHECK-NEXT: vlda wl9, [p4, #384]; vshuffle x3, x3, x5, r24; vmac.f bml5, bml5, x10, x7, r29 |
56 | | - ; CHECK-NEXT: vlda wh11, [p4, #480]; mov r3, p0; vmac.f bmh0, bmh0, x1, x9, r29 |
57 | | - ; CHECK-NEXT: vlda wl11, [p4, #448]; and r3, r3, r0; mov p7, p5; vmac.f bmh3, bmh3, x3, x9, r29 |
58 | | - ; CHECK-NEXT: vldb wh8, [p0, #32]; add r3, r3, #34; mov p4, p7; vmac.f bml3, bml3, x1, x7, r29 |
59 | | - ; CHECK-NEXT: vldb wl8, [p0], m4; vmac.f bml6, bml6, x3, x7, r29 |
60 | | - ; CHECK-NEXT: vldb wh10, [p0, #32]; vmac.f bmh4, bmh4, x1, x11, r29 |
61 | | - ; CHECK-NEXT: vldb wl10, [p0], m4; vmac.f bml1, bml1, x3, x11, r29 |
62 | | - ; CHECK-NEXT: vldb wh1, [p0, #32]; vmac.f bmh8, bmh8, x10, x11, r29 |
63 | | - ; CHECK-NEXT: vldb wl1, [p0], m4; vmac.f bmh7, bmh7, x8, x5, r29 |
64 | | - ; CHECK-NEXT: vldb wh3, [p0, #32]; vmac.f bmh5, bmh5, x1, x5, r29 |
65 | | - ; CHECK-NEXT: vldb.3d wl3, [p0], d1; vmac.f bml2, bml2, x3, x5, r29 |
66 | | - ; CHECK-NEXT: vshift.align x0, x0, s0, x8, r3; vmac.f bml0, bml0, x10, x5, r29 |
67 | | - ; CHECK-NEXT: nop |
68 | | - ; CHECK-NEXT: vshift.align x2, x2, s0, x10, r3 |
69 | | - ; CHECK-NEXT: vshuffle x5, x0, x2, r25 |
70 | | - ; CHECK-NEXT: vldb wh5, [p5, #32]; vshuffle x8, x0, x2, r9 |
71 | | - ; CHECK-NEXT: vlda wl5, [p5], #256; vshift.align x4, x4, s0, x1, r3 |
| 47 | + ; CHECK-NEXT: vldb wh8, [p0, #32]; nopa ; nops ; nopx ; mov p7, p5; vmac.f bml0, bml0, x10, x5, r29 |
| 48 | + ; CHECK-NEXT: vldb wl8, [p0], m4; nopa ; nops ; nopx ; vshuffle x3, x3, x5, r24; vmac.f bmh0, bmh0, x1, x9, r29 |
| 49 | + ; CHECK-NEXT: vldb wh10, [p0, #32]; mov r3, p0; vmac.f bmh3, bmh3, x3, x9, r29 |
| 50 | + ; CHECK-NEXT: vldb wl10, [p0], m4; mov p4, p7; vmac.f bmh2, bmh2, x10, x9, r29 |
| 51 | + ; CHECK-NEXT: vldb wh1, [p0, #32]; vmac.f bml3, bml3, x1, x7, r29 |
| 52 | + ; CHECK-NEXT: vldb wl1, [p0], m4; vmac.f bml6, bml6, x3, x7, r29 |
| 53 | + ; CHECK-NEXT: vldb wh3, [p0, #32]; vmac.f bml5, bml5, x10, x7, r29 |
| 54 | + ; CHECK-NEXT: vldb.3d wl3, [p0], d1; vshift.align x0, x0, s0, x8, r3; vmac.f bmh6, bmh6, x8, x11, r29 |
| 55 | + ; CHECK-NEXT: vldb wh5, [p5, #32]; vshift.align x2, x2, s0, x10, r3; vmac.f bmh4, bmh4, x1, x11, r29 |
| 56 | + ; CHECK-NEXT: vlda wl5, [p5], #256; vshift.align x4, x4, s0, x1, r3; vmac.f bml1, bml1, x3, x11, r29 |
| 57 | + ; CHECK-NEXT: vlda wh7, [p4, #352]; vshift.align x6, x6, s0, x3, r3; vmac.f bmh8, bmh8, x10, x11, r29 |
| 58 | + ; CHECK-NEXT: vlda wl7, [p4, #320]; and r3, r3, r0; vshuffle x8, x0, x2, r9; vmac.f bmh1, bmh1, x8, x9, r29 |
| 59 | + ; CHECK-NEXT: vlda wh9, [p4, #416]; add r3, r3, #34; vshuffle x3, x4, x6, r9; vmac.f bml4, bml4, x8, x7, r29 |
| 60 | + ; CHECK-NEXT: vlda wl9, [p4, #384]; vshuffle x5, x0, x2, r25; vmac.f bmh7, bmh7, x8, x5, r29 |
| 61 | + ; CHECK-NEXT: vlda wh11, [p4, #480]; vshuffle x10, x4, x6, r25; vmac.f bmh5, bmh5, x1, x5, r29 |
72 | 62 | ; CHECK-NEXT: .L_LEnd0: |
73 | | - ; CHECK-NEXT: nopb ; nopa ; nops ; nopx ; vshift.align x6, x6, s0, x3, r3; vmac.f bmh1, bmh1, x8, x9, r29 |
| 63 | + ; CHECK-NEXT: nopb ; vlda wl11, [p4, #448]; nops ; nopx ; vshuffle x1, x3, x5, r13; vmac.f bml2, bml2, x3, x5, r29 |
74 | 64 | ; CHECK-NEXT: // %bb.3: // %for.cond.cleanup |
75 | | - ; CHECK-NEXT: nopb ; nopa ; nops ; nopx ; vshuffle x10, x4, x6, r25; vmac.f bml4, bml4, x8, x7, r29 |
76 | | - ; CHECK-NEXT: nopa ; nopx ; vshuffle x3, x4, x6, r9; vmac.f bmh6, bmh6, x8, x11, r29 |
77 | | - ; CHECK-NEXT: vshuffle x1, x3, x5, r13; vmac.f bmh2, bmh2, x10, x9, r29 |
78 | | - ; CHECK-NEXT: vshuffle x3, x3, x5, r24; vmac.f bml5, bml5, x10, x7, r29 |
79 | | - ; CHECK-NEXT: mov r3, p0; vmac.f bmh0, bmh0, x1, x9, r29 |
80 | | - ; CHECK-NEXT: and r3, r3, r0; vmac.f bmh3, bmh3, x3, x9, r29 |
81 | | - ; CHECK-NEXT: add r3, r3, #34; vmac.f bml3, bml3, x1, x7, r29 |
| 65 | + ; CHECK-NEXT: vmac.f bml0, bml0, x10, x5, r29 |
| 66 | + ; CHECK-NEXT: vshuffle x3, x3, x5, r24; vmac.f bmh0, bmh0, x1, x9, r29 |
| 67 | + ; CHECK-NEXT: mov r3, p0; vmac.f bmh3, bmh3, x3, x9, r29 |
| 68 | + ; CHECK-NEXT: vmac.f bmh2, bmh2, x10, x9, r29 |
| 69 | + ; CHECK-NEXT: vmac.f bml3, bml3, x1, x7, r29 |
82 | 70 | ; CHECK-NEXT: vmac.f bml6, bml6, x3, x7, r29 |
| 71 | + ; CHECK-NEXT: vmac.f bml5, bml5, x10, x7, r29 |
| 72 | + ; CHECK-NEXT: vmac.f bmh6, bmh6, x8, x11, r29 |
83 | 73 | ; CHECK-NEXT: vmac.f bmh4, bmh4, x1, x11, r29 |
84 | 74 | ; CHECK-NEXT: vmac.f bml1, bml1, x3, x11, r29 |
85 | 75 | ; CHECK-NEXT: vmac.f bmh8, bmh8, x10, x11, r29 |
86 | | - ; CHECK-NEXT: vmac.f bmh7, bmh7, x8, x5, r29 |
87 | | - ; CHECK-NEXT: vmac.f bmh5, bmh5, x1, x5, r29 |
88 | | - ; CHECK-NEXT: vmac.f bml2, bml2, x3, x5, r29 |
89 | | - ; CHECK-NEXT: vmac.f bml0, bml0, x10, x5, r29 |
90 | 76 | ; CHECK-NEXT: nop |
91 | 77 | ; CHECK-NEXT: nop |
92 | 78 | ; CHECK-NEXT: nop |
|
0 commit comments