@@ -285,8 +285,8 @@ define void @nxv2f32(ptr %ldptr, ptr %stptr) {
285285; CHECK-2048-NEXT: ret
286286 %ldoff = getelementptr inbounds nuw i8 , ptr %ldptr , i64 64
287287 %stoff = getelementptr inbounds nuw i8 , ptr %stptr , i64 64
288- %x = load <vscale x 2 x float >, ptr %ldoff , align 1
289- store <vscale x 2 x float > %x , ptr %stoff , align 1
288+ %x = load <vscale x 2 x float >, ptr %ldoff , align 4
289+ store <vscale x 2 x float > %x , ptr %stoff , align 4
290290 ret void
291291}
292292
@@ -356,7 +356,116 @@ define void @nxv4f64(ptr %ldptr, ptr %stptr) {
356356; CHECK-2048-NEXT: ret
357357 %ldoff = getelementptr inbounds nuw i8 , ptr %ldptr , i64 128
358358 %stoff = getelementptr inbounds nuw i8 , ptr %stptr , i64 128
359- %x = load <vscale x 4 x double >, ptr %ldoff , align 1
360- store <vscale x 4 x double > %x , ptr %stoff , align 1
359+ %x = load <vscale x 4 x double >, ptr %ldoff , align 8
360+ store <vscale x 4 x double > %x , ptr %stoff , align 8
361+ ret void
362+ }
363+
364+ define void @v8i32 (ptr %ldptr , ptr %stptr ) {
365+ ; CHECK-LABEL: v8i32:
366+ ; CHECK: // %bb.0:
367+ ; CHECK-NEXT: ldp q0, q1, [x0, #64]
368+ ; CHECK-NEXT: ldp q3, q2, [x0, #32]
369+ ; CHECK-NEXT: stp q0, q1, [x1, #64]
370+ ; CHECK-NEXT: stp q3, q2, [x1, #32]
371+ ; CHECK-NEXT: ret
372+ ;
373+ ; CHECK-128-LABEL: v8i32:
374+ ; CHECK-128: // %bb.0:
375+ ; CHECK-128-NEXT: ldp q0, q1, [x0, #64]
376+ ; CHECK-128-NEXT: ldp q3, q2, [x0, #32]
377+ ; CHECK-128-NEXT: stp q0, q1, [x1, #64]
378+ ; CHECK-128-NEXT: stp q3, q2, [x1, #32]
379+ ; CHECK-128-NEXT: ret
380+ ;
381+ ; CHECK-256-LABEL: v8i32:
382+ ; CHECK-256: // %bb.0:
383+ ; CHECK-256-NEXT: ptrue p0.s
384+ ; CHECK-256-NEXT: mov x8, #8 // =0x8
385+ ; CHECK-256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
386+ ; CHECK-256-NEXT: ld1w { z1.s }, p0/z, [x0, #1, mul vl]
387+ ; CHECK-256-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2]
388+ ; CHECK-256-NEXT: st1w { z1.s }, p0, [x1, #1, mul vl]
389+ ; CHECK-256-NEXT: ret
390+ ;
391+ ; CHECK-512-LABEL: v8i32:
392+ ; CHECK-512: // %bb.0:
393+ ; CHECK-512-NEXT: ptrue p0.s
394+ ; CHECK-512-NEXT: mov x8, #8 // =0x8
395+ ; CHECK-512-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
396+ ; CHECK-512-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2]
397+ ; CHECK-512-NEXT: ret
398+ ;
399+ ; CHECK-1024-LABEL: v8i32:
400+ ; CHECK-1024: // %bb.0:
401+ ; CHECK-1024-NEXT: ptrue p0.s, vl16
402+ ; CHECK-1024-NEXT: mov x8, #8 // =0x8
403+ ; CHECK-1024-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
404+ ; CHECK-1024-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2]
405+ ; CHECK-1024-NEXT: ret
406+ ;
407+ ; CHECK-2048-LABEL: v8i32:
408+ ; CHECK-2048: // %bb.0:
409+ ; CHECK-2048-NEXT: ptrue p0.s, vl16
410+ ; CHECK-2048-NEXT: mov x8, #8 // =0x8
411+ ; CHECK-2048-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
412+ ; CHECK-2048-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2]
413+ ; CHECK-2048-NEXT: ret
414+ %ldoff = getelementptr inbounds nuw i8 , ptr %ldptr , i64 32
415+ %stoff = getelementptr inbounds nuw i8 , ptr %stptr , i64 32
416+ %x = load <16 x i32 >, ptr %ldoff , align 4
417+ store <16 x i32 > %x , ptr %stoff , align 4
418+ ret void
419+ }
420+
421+ ; FIXME: This is wrong for VLS.
422+ define void @v8i32_vscale (ptr %0 ) {
423+ ; CHECK-LABEL: v8i32_vscale:
424+ ; CHECK: // %bb.0:
425+ ; CHECK-NEXT: movi v0.4s, #1
426+ ; CHECK-NEXT: rdvl x8, #2
427+ ; CHECK-NEXT: add x8, x0, x8
428+ ; CHECK-NEXT: stp q0, q0, [x8]
429+ ; CHECK-NEXT: ret
430+ ;
431+ ; CHECK-128-LABEL: v8i32_vscale:
432+ ; CHECK-128: // %bb.0:
433+ ; CHECK-128-NEXT: movi v0.4s, #1
434+ ; CHECK-128-NEXT: rdvl x8, #2
435+ ; CHECK-128-NEXT: add x8, x0, x8
436+ ; CHECK-128-NEXT: stp q0, q0, [x8]
437+ ; CHECK-128-NEXT: ret
438+ ;
439+ ; CHECK-256-LABEL: v8i32_vscale:
440+ ; CHECK-256: // %bb.0:
441+ ; CHECK-256-NEXT: mov z0.s, #1 // =0x1
442+ ; CHECK-256-NEXT: ptrue p0.s
443+ ; CHECK-256-NEXT: st1w { z0.s }, p0, [x0, #1, mul vl]
444+ ; CHECK-256-NEXT: ret
445+ ;
446+ ; CHECK-512-LABEL: v8i32_vscale:
447+ ; CHECK-512: // %bb.0:
448+ ; CHECK-512-NEXT: mov z0.s, #1 // =0x1
449+ ; CHECK-512-NEXT: ptrue p0.s, vl8
450+ ; CHECK-512-NEXT: st1w { z0.s }, p0, [x0, #1, mul vl]
451+ ; CHECK-512-NEXT: ret
452+ ;
453+ ; CHECK-1024-LABEL: v8i32_vscale:
454+ ; CHECK-1024: // %bb.0:
455+ ; CHECK-1024-NEXT: mov z0.s, #1 // =0x1
456+ ; CHECK-1024-NEXT: ptrue p0.s, vl8
457+ ; CHECK-1024-NEXT: st1w { z0.s }, p0, [x0, #1, mul vl]
458+ ; CHECK-1024-NEXT: ret
459+ ;
460+ ; CHECK-2048-LABEL: v8i32_vscale:
461+ ; CHECK-2048: // %bb.0:
462+ ; CHECK-2048-NEXT: mov z0.s, #1 // =0x1
463+ ; CHECK-2048-NEXT: ptrue p0.s, vl8
464+ ; CHECK-2048-NEXT: st1w { z0.s }, p0, [x0, #1, mul vl]
465+ ; CHECK-2048-NEXT: ret
466+ %vl = call i64 @llvm.vscale ()
467+ %vlx = shl i64 %vl , 5
468+ %2 = getelementptr inbounds nuw i8 , ptr %0 , i64 %vlx
469+ store <8 x i32 > splat (i32 1 ), ptr %2 , align 4
361470 ret void
362471}
0 commit comments