@@ -185,3 +185,178 @@ define void @nxv2i64(ptr %ldptr, ptr %stptr) {
185185 store <vscale x 2 x i64 > %x , ptr %stoff , align 8
186186 ret void
187187}
188+
189+ define void @nxv4i8 (ptr %ldptr , ptr %stptr ) {
190+ ; CHECK-LABEL: nxv4i8:
191+ ; CHECK: // %bb.0:
192+ ; CHECK-NEXT: ptrue p0.s
193+ ; CHECK-NEXT: mov w8, #32 // =0x20
194+ ; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0, x8]
195+ ; CHECK-NEXT: st1b { z0.s }, p0, [x1, x8]
196+ ; CHECK-NEXT: ret
197+ ;
198+ ; CHECK-128-LABEL: nxv4i8:
199+ ; CHECK-128: // %bb.0:
200+ ; CHECK-128-NEXT: ptrue p0.s
201+ ; CHECK-128-NEXT: mov w8, #32 // =0x20
202+ ; CHECK-128-NEXT: ld1b { z0.s }, p0/z, [x0, x8]
203+ ; CHECK-128-NEXT: st1b { z0.s }, p0, [x1, x8]
204+ ; CHECK-128-NEXT: ret
205+ ;
206+ ; CHECK-256-LABEL: nxv4i8:
207+ ; CHECK-256: // %bb.0:
208+ ; CHECK-256-NEXT: ptrue p0.s
209+ ; CHECK-256-NEXT: ld1b { z0.s }, p0/z, [x0, #4, mul vl]
210+ ; CHECK-256-NEXT: st1b { z0.s }, p0, [x1, #4, mul vl]
211+ ; CHECK-256-NEXT: ret
212+ ;
213+ ; CHECK-512-LABEL: nxv4i8:
214+ ; CHECK-512: // %bb.0:
215+ ; CHECK-512-NEXT: ptrue p0.s
216+ ; CHECK-512-NEXT: ld1b { z0.s }, p0/z, [x0, #2, mul vl]
217+ ; CHECK-512-NEXT: st1b { z0.s }, p0, [x1, #2, mul vl]
218+ ; CHECK-512-NEXT: ret
219+ ;
220+ ; CHECK-1024-LABEL: nxv4i8:
221+ ; CHECK-1024: // %bb.0:
222+ ; CHECK-1024-NEXT: ptrue p0.s
223+ ; CHECK-1024-NEXT: ld1b { z0.s }, p0/z, [x0, #1, mul vl]
224+ ; CHECK-1024-NEXT: st1b { z0.s }, p0, [x1, #1, mul vl]
225+ ; CHECK-1024-NEXT: ret
226+ ;
227+ ; CHECK-2048-LABEL: nxv4i8:
228+ ; CHECK-2048: // %bb.0:
229+ ; CHECK-2048-NEXT: ptrue p0.s
230+ ; CHECK-2048-NEXT: mov w8, #32 // =0x20
231+ ; CHECK-2048-NEXT: ld1b { z0.s }, p0/z, [x0, x8]
232+ ; CHECK-2048-NEXT: st1b { z0.s }, p0, [x1, x8]
233+ ; CHECK-2048-NEXT: ret
234+ %ldoff = getelementptr inbounds nuw i8 , ptr %ldptr , i64 32
235+ %stoff = getelementptr inbounds nuw i8 , ptr %stptr , i64 32
236+ %x = load <vscale x 4 x i8 >, ptr %ldoff , align 1
237+ store <vscale x 4 x i8 > %x , ptr %stoff , align 1
238+ ret void
239+ }
240+
241+ define void @nxv2f32 (ptr %ldptr , ptr %stptr ) {
242+ ; CHECK-LABEL: nxv2f32:
243+ ; CHECK: // %bb.0:
244+ ; CHECK-NEXT: ptrue p0.d
245+ ; CHECK-NEXT: mov x8, #16 // =0x10
246+ ; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, x8, lsl #2]
247+ ; CHECK-NEXT: st1w { z0.d }, p0, [x1, x8, lsl #2]
248+ ; CHECK-NEXT: ret
249+ ;
250+ ; CHECK-128-LABEL: nxv2f32:
251+ ; CHECK-128: // %bb.0:
252+ ; CHECK-128-NEXT: ptrue p0.d
253+ ; CHECK-128-NEXT: mov x8, #16 // =0x10
254+ ; CHECK-128-NEXT: ld1w { z0.d }, p0/z, [x0, x8, lsl #2]
255+ ; CHECK-128-NEXT: st1w { z0.d }, p0, [x1, x8, lsl #2]
256+ ; CHECK-128-NEXT: ret
257+ ;
258+ ; CHECK-256-LABEL: nxv2f32:
259+ ; CHECK-256: // %bb.0:
260+ ; CHECK-256-NEXT: ptrue p0.d
261+ ; CHECK-256-NEXT: ld1w { z0.d }, p0/z, [x0, #4, mul vl]
262+ ; CHECK-256-NEXT: st1w { z0.d }, p0, [x1, #4, mul vl]
263+ ; CHECK-256-NEXT: ret
264+ ;
265+ ; CHECK-512-LABEL: nxv2f32:
266+ ; CHECK-512: // %bb.0:
267+ ; CHECK-512-NEXT: ptrue p0.d
268+ ; CHECK-512-NEXT: ld1w { z0.d }, p0/z, [x0, #2, mul vl]
269+ ; CHECK-512-NEXT: st1w { z0.d }, p0, [x1, #2, mul vl]
270+ ; CHECK-512-NEXT: ret
271+ ;
272+ ; CHECK-1024-LABEL: nxv2f32:
273+ ; CHECK-1024: // %bb.0:
274+ ; CHECK-1024-NEXT: ptrue p0.d
275+ ; CHECK-1024-NEXT: ld1w { z0.d }, p0/z, [x0, #1, mul vl]
276+ ; CHECK-1024-NEXT: st1w { z0.d }, p0, [x1, #1, mul vl]
277+ ; CHECK-1024-NEXT: ret
278+ ;
279+ ; CHECK-2048-LABEL: nxv2f32:
280+ ; CHECK-2048: // %bb.0:
281+ ; CHECK-2048-NEXT: ptrue p0.d
282+ ; CHECK-2048-NEXT: mov x8, #16 // =0x10
283+ ; CHECK-2048-NEXT: ld1w { z0.d }, p0/z, [x0, x8, lsl #2]
284+ ; CHECK-2048-NEXT: st1w { z0.d }, p0, [x1, x8, lsl #2]
285+ ; CHECK-2048-NEXT: ret
286+ %ldoff = getelementptr inbounds nuw i8 , ptr %ldptr , i64 64
287+ %stoff = getelementptr inbounds nuw i8 , ptr %stptr , i64 64
288+ %x = load <vscale x 2 x float >, ptr %ldoff , align 1
289+ store <vscale x 2 x float > %x , ptr %stoff , align 1
290+ ret void
291+ }
292+
293+ define void @nxv4f64 (ptr %ldptr , ptr %stptr ) {
294+ ; CHECK-LABEL: nxv4f64:
295+ ; CHECK: // %bb.0:
296+ ; CHECK-NEXT: ptrue p0.d
297+ ; CHECK-NEXT: mov x8, #16 // =0x10
298+ ; CHECK-NEXT: add x9, x0, #128
299+ ; CHECK-NEXT: ldr z1, [x9, #1, mul vl]
300+ ; CHECK-NEXT: add x9, x1, #128
301+ ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
302+ ; CHECK-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3]
303+ ; CHECK-NEXT: str z1, [x9, #1, mul vl]
304+ ; CHECK-NEXT: ret
305+ ;
306+ ; CHECK-128-LABEL: nxv4f64:
307+ ; CHECK-128: // %bb.0:
308+ ; CHECK-128-NEXT: add x8, x0, #128
309+ ; CHECK-128-NEXT: ldr z1, [x0, #8, mul vl]
310+ ; CHECK-128-NEXT: ldr z0, [x8, #1, mul vl]
311+ ; CHECK-128-NEXT: add x8, x1, #128
312+ ; CHECK-128-NEXT: str z0, [x8, #1, mul vl]
313+ ; CHECK-128-NEXT: str z1, [x1, #8, mul vl]
314+ ; CHECK-128-NEXT: ret
315+ ;
316+ ; CHECK-256-LABEL: nxv4f64:
317+ ; CHECK-256: // %bb.0:
318+ ; CHECK-256-NEXT: add x8, x0, #128
319+ ; CHECK-256-NEXT: ldr z1, [x0, #4, mul vl]
320+ ; CHECK-256-NEXT: ldr z0, [x8, #1, mul vl]
321+ ; CHECK-256-NEXT: add x8, x1, #128
322+ ; CHECK-256-NEXT: str z0, [x8, #1, mul vl]
323+ ; CHECK-256-NEXT: str z1, [x1, #4, mul vl]
324+ ; CHECK-256-NEXT: ret
325+ ;
326+ ; CHECK-512-LABEL: nxv4f64:
327+ ; CHECK-512: // %bb.0:
328+ ; CHECK-512-NEXT: add x8, x0, #128
329+ ; CHECK-512-NEXT: ldr z1, [x0, #2, mul vl]
330+ ; CHECK-512-NEXT: ldr z0, [x8, #1, mul vl]
331+ ; CHECK-512-NEXT: add x8, x1, #128
332+ ; CHECK-512-NEXT: str z0, [x8, #1, mul vl]
333+ ; CHECK-512-NEXT: str z1, [x1, #2, mul vl]
334+ ; CHECK-512-NEXT: ret
335+ ;
336+ ; CHECK-1024-LABEL: nxv4f64:
337+ ; CHECK-1024: // %bb.0:
338+ ; CHECK-1024-NEXT: add x8, x0, #128
339+ ; CHECK-1024-NEXT: ldr z1, [x0, #1, mul vl]
340+ ; CHECK-1024-NEXT: ldr z0, [x8, #1, mul vl]
341+ ; CHECK-1024-NEXT: add x8, x1, #128
342+ ; CHECK-1024-NEXT: str z0, [x8, #1, mul vl]
343+ ; CHECK-1024-NEXT: str z1, [x1, #1, mul vl]
344+ ; CHECK-1024-NEXT: ret
345+ ;
346+ ; CHECK-2048-LABEL: nxv4f64:
347+ ; CHECK-2048: // %bb.0:
348+ ; CHECK-2048-NEXT: ptrue p0.d
349+ ; CHECK-2048-NEXT: mov x8, #16 // =0x10
350+ ; CHECK-2048-NEXT: add x9, x0, #128
351+ ; CHECK-2048-NEXT: ldr z1, [x9, #1, mul vl]
352+ ; CHECK-2048-NEXT: add x9, x1, #128
353+ ; CHECK-2048-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
354+ ; CHECK-2048-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3]
355+ ; CHECK-2048-NEXT: str z1, [x9, #1, mul vl]
356+ ; CHECK-2048-NEXT: ret
357+ %ldoff = getelementptr inbounds nuw i8 , ptr %ldptr , i64 128
358+ %stoff = getelementptr inbounds nuw i8 , ptr %stptr , i64 128
359+ %x = load <vscale x 4 x double >, ptr %ldoff , align 1
360+ store <vscale x 4 x double > %x , ptr %stoff , align 1
361+ ret void
362+ }
0 commit comments