|
| 1 | + |
| 2 | +build/matmul_16m_6n_k.bin: file format binary |
| 3 | + |
| 4 | + |
| 5 | +Disassembly of section .data: |
| 6 | + |
| 7 | +0000000000000000 <.data>: |
| 8 | + 0: d37ef463 lsl x3, x3, #2 |
| 9 | + 4: d37ef484 lsl x4, x4, #2 |
| 10 | + 8: d37ef4a5 lsl x5, x5, #2 |
| 11 | + c: 4c402800 ld1 {v0.4s-v3.4s}, [x0] |
| 12 | + 10: bd400024 ldr s4, [x1] |
| 13 | + 14: 8b040021 add x1, x1, x4 |
| 14 | + 18: 4c402859 ld1 {v25.4s-v28.4s}, [x2] |
| 15 | + 1c: 4f841019 fmla v25.4s, v0.4s, v4.s[0] |
| 16 | + 20: 4f84103a fmla v26.4s, v1.4s, v4.s[0] |
| 17 | + 24: 4f84105b fmla v27.4s, v2.4s, v4.s[0] |
| 18 | + 28: 4f84107c fmla v28.4s, v3.4s, v4.s[0] |
| 19 | + 2c: 4c852859 st1 {v25.4s-v28.4s}, [x2], x5 |
| 20 | + 30: bd400024 ldr s4, [x1] |
| 21 | + 34: 8b040021 add x1, x1, x4 |
| 22 | + 38: 4c402851 ld1 {v17.4s-v20.4s}, [x2] |
| 23 | + 3c: 4f841011 fmla v17.4s, v0.4s, v4.s[0] |
| 24 | + 40: 4f841032 fmla v18.4s, v1.4s, v4.s[0] |
| 25 | + 44: 4f841053 fmla v19.4s, v2.4s, v4.s[0] |
| 26 | + 48: 4f841074 fmla v20.4s, v3.4s, v4.s[0] |
| 27 | + 4c: 4c852851 st1 {v17.4s-v20.4s}, [x2], x5 |
| 28 | + 50: bd400024 ldr s4, [x1] |
| 29 | + 54: 8b040021 add x1, x1, x4 |
| 30 | + 58: 4c402855 ld1 {v21.4s-v24.4s}, [x2] |
| 31 | + 5c: 4f841015 fmla v21.4s, v0.4s, v4.s[0] |
| 32 | + 60: 4f841036 fmla v22.4s, v1.4s, v4.s[0] |
| 33 | + 64: 4f841057 fmla v23.4s, v2.4s, v4.s[0] |
| 34 | + 68: 4f841078 fmla v24.4s, v3.4s, v4.s[0] |
| 35 | + 6c: 4c852855 st1 {v21.4s-v24.4s}, [x2], x5 |
| 36 | + 70: bd400024 ldr s4, [x1] |
| 37 | + 74: 8b040021 add x1, x1, x4 |
| 38 | + 78: 4c402859 ld1 {v25.4s-v28.4s}, [x2] |
| 39 | + 7c: 4f841019 fmla v25.4s, v0.4s, v4.s[0] |
| 40 | + 80: 4f84103a fmla v26.4s, v1.4s, v4.s[0] |
| 41 | + 84: 4f84105b fmla v27.4s, v2.4s, v4.s[0] |
| 42 | + 88: 4f84107c fmla v28.4s, v3.4s, v4.s[0] |
| 43 | + 8c: 4c852859 st1 {v25.4s-v28.4s}, [x2], x5 |
| 44 | + 90: bd400024 ldr s4, [x1] |
| 45 | + 94: 8b040021 add x1, x1, x4 |
| 46 | + 98: 4c402851 ld1 {v17.4s-v20.4s}, [x2] |
| 47 | + 9c: 4f841011 fmla v17.4s, v0.4s, v4.s[0] |
| 48 | + a0: 4f841032 fmla v18.4s, v1.4s, v4.s[0] |
| 49 | + a4: 4f841053 fmla v19.4s, v2.4s, v4.s[0] |
| 50 | + a8: 4f841074 fmla v20.4s, v3.4s, v4.s[0] |
| 51 | + ac: 4c852851 st1 {v17.4s-v20.4s}, [x2], x5 |
| 52 | + b0: bd400024 ldr s4, [x1] |
| 53 | + b4: 8b040021 add x1, x1, x4 |
| 54 | + b8: 4c402855 ld1 {v21.4s-v24.4s}, [x2] |
| 55 | + bc: 4f841015 fmla v21.4s, v0.4s, v4.s[0] |
| 56 | + c0: 4f841036 fmla v22.4s, v1.4s, v4.s[0] |
| 57 | + c4: 4f841057 fmla v23.4s, v2.4s, v4.s[0] |
| 58 | + c8: 4f841078 fmla v24.4s, v3.4s, v4.s[0] |
| 59 | + cc: 4c852855 st1 {v21.4s-v24.4s}, [x2], x5 |
| 60 | + d0: d65f03c0 ret |
| 61 | + d4: 6dbf27e8 stp d8, d9, [sp, #-16]! |
| 62 | + d8: d37ef463 lsl x3, x3, #2 |
| 63 | + dc: d37ef484 lsl x4, x4, #2 |
| 64 | + e0: d37ef4a5 lsl x5, x5, #2 |
| 65 | + e4: aa0103e6 mov x6, x1 |
| 66 | + e8: aa0203e7 mov x7, x2 |
| 67 | + ec: aa0003e8 mov x8, x0 |
| 68 | + f0: aa0103e9 mov x9, x1 |
| 69 | + f4: aa0003ea mov x10, x0 |
| 70 | + f8: aa0203eb mov x11, x2 |
| 71 | + fc: d280008c mov x12, #0x4 // #4 |
| 72 | + 100: d2800211 mov x17, #0x10 // #16 |
| 73 | + 104: d1000631 sub x17, x17, #0x1 |
| 74 | + 108: aa0a03e8 mov x8, x10 |
| 75 | + 10c: aa0b03e7 mov x7, x11 |
| 76 | + 110: d2800090 mov x16, #0x4 // #4 |
| 77 | + 114: d1000610 sub x16, x16, #0x1 |
| 78 | + 118: aa0703e2 mov x2, x7 |
| 79 | + 11c: aa0803e0 mov x0, x8 |
| 80 | + 120: aa0903e6 mov x6, x9 |
| 81 | + 124: aa0903e1 mov x1, x9 |
| 82 | + 128: 4cc52859 ld1 {v25.4s-v28.4s}, [x2], x5 |
| 83 | + 12c: 4cc52851 ld1 {v17.4s-v20.4s}, [x2], x5 |
| 84 | + 130: 4cc52855 ld1 {v21.4s-v24.4s}, [x2], x5 |
| 85 | + 134: 4cc52845 ld1 {v5.4s-v8.4s}, [x2], x5 |
| 86 | + 138: d280002f mov x15, #0x1 // #1 |
| 87 | + 13c: d10005ef sub x15, x15, #0x1 |
| 88 | + 140: 4cc32800 ld1 {v0.4s-v3.4s}, [x0], x3 |
| 89 | + 144: bd400024 ldr s4, [x1] |
| 90 | + 148: 8b040021 add x1, x1, x4 |
| 91 | + 14c: 4f841019 fmla v25.4s, v0.4s, v4.s[0] |
| 92 | + 150: 4f84103a fmla v26.4s, v1.4s, v4.s[0] |
| 93 | + 154: 4f84105b fmla v27.4s, v2.4s, v4.s[0] |
| 94 | + 158: 4f84107c fmla v28.4s, v3.4s, v4.s[0] |
| 95 | + 15c: bd400024 ldr s4, [x1] |
| 96 | + 160: 8b040021 add x1, x1, x4 |
| 97 | + 164: 4f841011 fmla v17.4s, v0.4s, v4.s[0] |
| 98 | + 168: 4f841032 fmla v18.4s, v1.4s, v4.s[0] |
| 99 | + 16c: 4f841053 fmla v19.4s, v2.4s, v4.s[0] |
| 100 | + 170: 4f841074 fmla v20.4s, v3.4s, v4.s[0] |
| 101 | + 174: bd400024 ldr s4, [x1] |
| 102 | + 178: 8b040021 add x1, x1, x4 |
| 103 | + 17c: 4f841015 fmla v21.4s, v0.4s, v4.s[0] |
| 104 | + 180: 4f841036 fmla v22.4s, v1.4s, v4.s[0] |
| 105 | + 184: 4f841057 fmla v23.4s, v2.4s, v4.s[0] |
| 106 | + 188: 4f841078 fmla v24.4s, v3.4s, v4.s[0] |
| 107 | + 18c: bd400024 ldr s4, [x1] |
| 108 | + 190: 8b040021 add x1, x1, x4 |
| 109 | + 194: 4f841005 fmla v5.4s, v0.4s, v4.s[0] |
| 110 | + 198: 4f841026 fmla v6.4s, v1.4s, v4.s[0] |
| 111 | + 19c: 4f841047 fmla v7.4s, v2.4s, v4.s[0] |
| 112 | + 1a0: 4f841068 fmla v8.4s, v3.4s, v4.s[0] |
| 113 | + 1a4: 910010c6 add x6, x6, #0x4 |
| 114 | + 1a8: aa0603e1 mov x1, x6 |
| 115 | + 1ac: b5fffc8f cbnz x15, 0x13c |
| 116 | + 1b0: aa0703e2 mov x2, x7 |
| 117 | + 1b4: 4c852859 st1 {v25.4s-v28.4s}, [x2], x5 |
| 118 | + 1b8: 4c852851 st1 {v17.4s-v20.4s}, [x2], x5 |
| 119 | + 1bc: 4c852855 st1 {v21.4s-v24.4s}, [x2], x5 |
| 120 | + 1c0: 4c852845 st1 {v5.4s-v8.4s}, [x2], x5 |
| 121 | + 1c4: 910100e7 add x7, x7, #0x40 |
| 122 | + 1c8: 91010108 add x8, x8, #0x40 |
| 123 | + 1cc: b5fffa50 cbnz x16, 0x114 |
| 124 | + 1d0: 9b0c2489 madd x9, x4, x12, x9 |
| 125 | + 1d4: 9b0c2cab madd x11, x5, x12, x11 |
| 126 | + 1d8: b5fff971 cbnz x17, 0x104 |
| 127 | + 1dc: 6cc127e8 ldp d8, d9, [sp], #16 |
| 128 | + 1e0: d65f03c0 ret |
0 commit comments