@@ -86,47 +86,33 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
8686.macro COPY4x8
8787 ldr q0 , [ A01 ], # 16
8888 ldr q1 , [ A02 ], # 16
89- ins v8.s [ 0 ], v0.s [ 0 ]
90- ins v10.s [ 0 ], v0.s [ 1 ]
91- ins v12.s [ 0 ], v0.s [ 2 ]
92- ins v14.s [ 0 ], v0.s [ 3 ]
93- ins v8.s [ 1 ], v1.s [ 0 ]
94- ins v10.s [ 1 ], v1.s [ 1 ]
95- ins v12.s [ 1 ], v1.s [ 2 ]
96- ins v14.s [ 1 ], v1.s [ 3 ]
97-
9889 ldr q2 , [ A03 ], # 16
9990 ldr q3 , [ A04 ], # 16
100- ins v8.s [ 2 ], v2.s [ 0 ]
101- ins v10.s [ 2 ], v2.s [ 1 ]
102- ins v12.s [ 2 ], v2.s [ 2 ]
103- ins v14.s [ 2 ], v2.s [ 3 ]
104- ins v8.s [ 3 ], v3.s [ 0 ]
105- ins v10.s [ 3 ], v3.s [ 1 ]
106- ins v12.s [ 3 ], v3.s [ 2 ]
107- ins v14.s [ 3 ], v3.s [ 3 ]
91+
92+ zip1 v16.4s , v0.4s , v1.4s
93+ zip1 v17.4s , v2.4s , v3.4s
94+ zip2 v18.4s , v0.4s , v1.4s
95+ zip2 v19.4s , v2.4s , v3.4s
96+
97+ zip1 v8.2d , v16.2d , v17.2d
98+ zip2 v10.2d , v16.2d , v17.2d
99+ zip1 v12.2d , v18.2d , v19.2d
100+ zip2 v14.2d , v18.2d , v19.2d
108101
109102 ldr q4 , [ A05 ], # 16
110103 ldr q5 , [ A06 ], # 16
111- ins v9.s [ 0 ], v4.s [ 0 ]
112- ins v11.s [ 0 ], v4.s [ 1 ]
113- ins v13.s [ 0 ], v4.s [ 2 ]
114- ins v15.s [ 0 ], v4.s [ 3 ]
115- ins v9.s [ 1 ], v5.s [ 0 ]
116- ins v11.s [ 1 ], v5.s [ 1 ]
117- ins v13.s [ 1 ], v5.s [ 2 ]
118- ins v15.s [ 1 ], v5.s [ 3 ]
119-
120104 ldr q6 , [ A07 ], # 16
121105 ldr q7 , [ A08 ], # 16
122- ins v9.s [ 2 ], v6.s [ 0 ]
123- ins v11.s [ 2 ], v6.s [ 1 ]
124- ins v13.s [ 2 ], v6.s [ 2 ]
125- ins v15.s [ 2 ], v6.s [ 3 ]
126- ins v9.s [ 3 ], v7.s [ 0 ]
127- ins v11.s [ 3 ], v7.s [ 1 ]
128- ins v13.s [ 3 ], v7.s [ 2 ]
129- ins v15.s [ 3 ], v7.s [ 3 ]
106+
107+ zip1 v16.4s , v4.4s , v5.4s
108+ zip1 v17.4s , v6.4s , v7.4s
109+ zip2 v18.4s , v4.4s , v5.4s
110+ zip2 v19.4s , v6.4s , v7.4s
111+
112+ zip1 v9.2d , v16.2d , v17.2d
113+ zip2 v11.2d , v16.2d , v17.2d
114+ zip1 v13.2d , v18.2d , v19.2d
115+ zip2 v15.2d , v18.2d , v19.2d
130116
131117 st1 {v8.4s , v9.4s , v10.4s , v11.4s} , [ B00 ], # 64
132118 st1 {v12.4s , v13.4s , v14.4s , v15.4s} , [ B00 ], # 64
@@ -135,31 +121,25 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
135121.macro COPY2x8
136122 ldr d0 , [ A01 ], # 8
137123 ldr d1 , [ A02 ], # 8
138- ins v8.s [ 0 ], v0.s [ 0 ]
139- ins v10.s [ 0 ], v0.s [ 1 ]
140- ins v8.s [ 1 ], v1.s [ 0 ]
141- ins v10.s [ 1 ], v1.s [ 1 ]
142-
143124 ldr d2 , [ A03 ], # 8
144125 ldr d3 , [ A04 ], # 8
145- ins v8.s [ 2 ], v2.s [ 0 ]
146- ins v10.s [ 2 ], v2.s [ 1 ]
147- ins v8.s [ 3 ], v3.s [ 0 ]
148- ins v10.s [ 3 ], v3.s [ 1 ]
126+
127+ zip1 v12.4s , v0.4s , v1.4s
128+ zip1 v13.4s , v2.4s , v3.4s
129+
130+ zip1 v8.2d , v12.2d , v13.2d
131+ zip2 v10.2d , v12.2d , v13.2d
149132
150133 ldr d4 , [ A05 ], # 8
151134 ldr d5 , [ A06 ], # 8
152- ins v9.s [ 0 ], v4.s [ 0 ]
153- ins v11.s [ 0 ], v4.s [ 1 ]
154- ins v9.s [ 1 ], v5.s [ 0 ]
155- ins v11.s [ 1 ], v5.s [ 1 ]
156-
157135 ldr d6 , [ A07 ], # 8
158136 ldr d7 , [ A08 ], # 8
159- ins v9.s [ 2 ], v6.s [ 0 ]
160- ins v11.s [ 2 ], v6.s [ 1 ]
161- ins v9.s [ 3 ], v7.s [ 0 ]
162- ins v11.s [ 3 ], v7.s [ 1 ]
137+
138+ zip1 v12.4s , v4.4s , v5.4s
139+ zip1 v13.4s , v6.4s , v7.4s
140+
141+ zip1 v9.2d , v12.2d , v13.2d
142+ zip2 v11.2d , v12.2d , v13.2d
163143
164144 st1 {v8.4s , v9.4s , v10.4s , v11.4s} , [ B00 ], # 64
165145.endm
@@ -191,43 +171,33 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
191171.macro COPY4x4
192172 ldr q0 , [ A01 ], # 16
193173 ldr q1 , [ A02 ], # 16
194- ins v8.s [ 0 ], v0.s [ 0 ]
195- ins v9.s [ 0 ], v0.s [ 1 ]
196- ins v10.s [ 0 ], v0.s [ 2 ]
197- ins v11.s [ 0 ], v0.s [ 3 ]
198- ins v8.s [ 1 ], v1.s [ 0 ]
199- ins v9.s [ 1 ], v1.s [ 1 ]
200- ins v10.s [ 1 ], v1.s [ 2 ]
201- ins v11.s [ 1 ], v1.s [ 3 ]
202-
203174 ldr q2 , [ A03 ], # 16
204175 ldr q3 , [ A04 ], # 16
205- ins v8.s [ 2 ], v2.s [ 0 ]
206- ins v9.s [ 2 ], v2.s [ 1 ]
207- ins v10.s [ 2 ], v2.s [ 2 ]
208- ins v11.s [ 2 ], v2.s [ 3 ]
209- ins v8.s [ 3 ], v3.s [ 0 ]
210- ins v9.s [ 3 ], v3.s [ 1 ]
211- ins v10.s [ 3 ], v3.s [ 2 ]
212- ins v11.s [ 3 ], v3.s [ 3 ]
176+
177+ zip1 v12.4s , v0.4s , v1.4s
178+ zip1 v13.4s , v2.4s , v3.4s
179+ zip2 v14.4s , v0.4s , v1.4s
180+ zip2 v15.4s , v2.4s , v3.4s
181+
182+ zip1 v8.2d , v12.2d , v13.2d
183+ zip2 v9.2d , v12.2d , v13.2d
184+ zip1 v10.2d , v14.2d , v15.2d
185+ zip2 v11.2d , v14.2d , v15.2d
213186
214187 st1 {v8.4s , v9.4s , v10.4s , v11.4s} , [ B00 ], # 64
215188.endm
216189
217190.macro COPY2x4
218191 ldr d0 , [ A01 ], # 8
219192 ldr d1 , [ A02 ], # 8
220- ins v8.s [ 0 ], v0.s [ 0 ]
221- ins v9.s [ 0 ], v0.s [ 1 ]
222- ins v8.s [ 1 ], v1.s [ 0 ]
223- ins v9.s [ 1 ], v1.s [ 1 ]
224-
225193 ldr d2 , [ A03 ], # 8
226194 ldr d3 , [ A04 ], # 8
227- ins v8.s [ 2 ], v2.s [ 0 ]
228- ins v9.s [ 2 ], v2.s [ 1 ]
229- ins v8.s [ 3 ], v3.s [ 0 ]
230- ins v9.s [ 3 ], v3.s [ 1 ]
195+
196+ zip1 v10.4s , v0.4s , v1.4s
197+ zip1 v11.4s , v2.4s , v3.4s
198+
199+ zip1 v8.2d , v10.2d , v11.2d
200+ zip2 v9.2d , v10.2d , v11.2d
231201
232202 st1 {v8.4s , v9.4s} , [ B00 ], # 32
233203.endm
@@ -249,25 +219,24 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
249219.macro COPY4x2
250220 ldr q0 , [ A01 ], # 16
251221 ldr q1 , [ A02 ], # 16
252- ins v8.s [ 0 ], v0.s [ 0 ]
253- ins v9.s [ 0 ] , v0.s [ 1 ]
254- ins v10.s [ 0 ] , v0.s [ 2 ]
255- ins v11.s [ 0 ], v0.s [ 3 ]
256- ins v8.s [ 1 ], v1.s [ 0 ]
257- ins v9.s [ 1 ], v1.s [ 1 ]
258- ins v10.s [ 1 ], v1.s [ 2 ]
259- ins v11.s [ 1 ], v1.s [ 3 ]
222+
223+ zip1 v12.4s , v0.4s , v1.4s
224+ zip2 v13.4s , v0.4s , v1.4s
225+
226+ dup v8.2d , v12.d [ 0 ]
227+ dup v9.2d , v12.d [ 1 ]
228+ dup v10.2d , v13.d [ 0 ]
229+ dup v11.2d , v13.d [ 1 ]
260230
261231 st1 {v8.2s , v9.2s , v10.2s , v11.2s} , [ B00 ], # 32
262232.endm
263233
264234.macro COPY2x2
265235 ldr d0 , [ A01 ], # 8
266236 ldr d1 , [ A02 ], # 8
267- ins v8.s [ 0 ], v0.s [ 0 ]
268- ins v9.s [ 0 ], v0.s [ 1 ]
269- ins v8.s [ 1 ], v1.s [ 0 ]
270- ins v9.s [ 1 ], v1.s [ 1 ]
237+
238+ zip1 v8.2s , v0.2s , v1.2s
239+ zip2 v9.2s , v0.2s , v1.2s
271240
272241 st1 {v8.2s , v9.2s} , [ B00 ], # 16
273242.endm
0 commit comments