@@ -387,3 +387,257 @@ if.then:
387387if.else:
388388 ret void
389389}
390+
391+ ; Different sizes / types
392+
393+ define <vscale x 16 x i8 > @load_v16i8_store_v4i32_forward_load (ptr %p , <vscale x 4 x i32 > %x ) {
394+ ; CHECK-LABEL: @load_v16i8_store_v4i32_forward_load(
395+ ; CHECK-NEXT: store <vscale x 4 x i32> [[X:%.*]], ptr [[P:%.*]], align 16
396+ ; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[P]], align 16
397+ ; CHECK-NEXT: ret <vscale x 16 x i8> [[LOAD]]
398+ ;
399+ store <vscale x 4 x i32 > %x , ptr %p
400+ %load = load <vscale x 16 x i8 >, ptr %p
401+ ret <vscale x 16 x i8 > %load
402+ }
403+
404+ define <vscale x 4 x float > @load_v4f32_store_v4i32_forward_load (ptr %p , <vscale x 4 x i32 > %x ) {
405+ ; CHECK-LABEL: @load_v4f32_store_v4i32_forward_load(
406+ ; CHECK-NEXT: store <vscale x 4 x i32> [[X:%.*]], ptr [[P:%.*]], align 16
407+ ; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 4 x float>, ptr [[P]], align 16
408+ ; CHECK-NEXT: ret <vscale x 4 x float> [[LOAD]]
409+ ;
410+ store <vscale x 4 x i32 > %x , ptr %p
411+ %load = load <vscale x 4 x float >, ptr %p
412+ ret <vscale x 4 x float > %load
413+ }
414+
415+ define <vscale x 4 x float > @load_v4f32_store_v16i8_forward_load (ptr %p , <vscale x 16 x i8 > %x ) {
416+ ; CHECK-LABEL: @load_v4f32_store_v16i8_forward_load(
417+ ; CHECK-NEXT: store <vscale x 16 x i8> [[X:%.*]], ptr [[P:%.*]], align 16
418+ ; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 4 x float>, ptr [[P]], align 16
419+ ; CHECK-NEXT: ret <vscale x 4 x float> [[LOAD]]
420+ ;
421+ store <vscale x 16 x i8 > %x , ptr %p
422+ %load = load <vscale x 4 x float >, ptr %p
423+ ret <vscale x 4 x float > %load
424+ }
425+
426+ define <vscale x 4 x i32 > @load_v4i32_store_v4f32_forward_load (ptr %p , <vscale x 4 x float > %x ) {
427+ ; CHECK-LABEL: @load_v4i32_store_v4f32_forward_load(
428+ ; CHECK-NEXT: store <vscale x 4 x float> [[X:%.*]], ptr [[P:%.*]], align 16
429+ ; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[P]], align 16
430+ ; CHECK-NEXT: ret <vscale x 4 x i32> [[LOAD]]
431+ ;
432+ store <vscale x 4 x float > %x , ptr %p
433+ %load = load <vscale x 4 x i32 >, ptr %p
434+ ret <vscale x 4 x i32 > %load
435+ }
436+
437+ define <vscale x 4 x i32 > @load_v4i32_store_v4i64_forward_load (ptr %p , <vscale x 4 x i64 > %x ) {
438+ ; CHECK-LABEL: @load_v4i32_store_v4i64_forward_load(
439+ ; CHECK-NEXT: store <vscale x 4 x i64> [[X:%.*]], ptr [[P:%.*]], align 32
440+ ; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[P]], align 16
441+ ; CHECK-NEXT: ret <vscale x 4 x i32> [[LOAD]]
442+ ;
443+ store <vscale x 4 x i64 > %x , ptr %p
444+ %load = load <vscale x 4 x i32 >, ptr %p
445+ ret <vscale x 4 x i32 > %load
446+ }
447+
448+ define <vscale x 4 x i64 > @load_v4i64_store_v4i32_forward_load (ptr %p , <vscale x 4 x i32 > %x ) {
449+ ; CHECK-LABEL: @load_v4i64_store_v4i32_forward_load(
450+ ; CHECK-NEXT: store <vscale x 4 x i32> [[X:%.*]], ptr [[P:%.*]], align 16
451+ ; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 4 x i64>, ptr [[P]], align 32
452+ ; CHECK-NEXT: ret <vscale x 4 x i64> [[LOAD]]
453+ ;
454+ store <vscale x 4 x i32 > %x , ptr %p
455+ %load = load <vscale x 4 x i64 >, ptr %p
456+ ret <vscale x 4 x i64 > %load
457+ }
458+
459+ define <vscale x 2 x i32 > @load_v2i32_store_v4i32_forward_load (ptr %p , <vscale x 4 x i32 > %x ) {
460+ ; CHECK-LABEL: @load_v2i32_store_v4i32_forward_load(
461+ ; CHECK-NEXT: store <vscale x 4 x i32> [[X:%.*]], ptr [[P:%.*]], align 16
462+ ; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 2 x i32>, ptr [[P]], align 8
463+ ; CHECK-NEXT: ret <vscale x 2 x i32> [[LOAD]]
464+ ;
465+ store <vscale x 4 x i32 > %x , ptr %p
466+ %load = load <vscale x 2 x i32 >, ptr %p
467+ ret <vscale x 2 x i32 > %load
468+ }
469+
470+ define <vscale x 2 x i32 > @load_v2i32_store_v4i32_forward_load_offsets (ptr %p , <vscale x 4 x i32 > %x ) {
471+ ; CHECK-LABEL: @load_v2i32_store_v4i32_forward_load_offsets(
472+ ; CHECK-NEXT: store <vscale x 4 x i32> [[X:%.*]], ptr [[P:%.*]], align 16
473+ ; CHECK-NEXT: [[Q:%.*]] = getelementptr <vscale x 2 x i32>, ptr [[P]], i64 1
474+ ; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 2 x i32>, ptr [[Q]], align 8
475+ ; CHECK-NEXT: ret <vscale x 2 x i32> [[LOAD]]
476+ ;
477+ store <vscale x 4 x i32 > %x , ptr %p
478+ %q = getelementptr <vscale x 2 x i32 >, ptr %p , i64 1
479+ %load = load <vscale x 2 x i32 >, ptr %q
480+ ret <vscale x 2 x i32 > %load
481+ }
482+
483+ define <vscale x 2 x i32 > @load_v2i32_store_v4i32_forward_load_offsetc (ptr %p , <vscale x 4 x i32 > %x ) {
484+ ; CHECK-LABEL: @load_v2i32_store_v4i32_forward_load_offsetc(
485+ ; CHECK-NEXT: store <vscale x 4 x i32> [[X:%.*]], ptr [[P:%.*]], align 16
486+ ; CHECK-NEXT: [[Q:%.*]] = getelementptr <2 x i32>, ptr [[P]], i64 1
487+ ; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 2 x i32>, ptr [[Q]], align 8
488+ ; CHECK-NEXT: ret <vscale x 2 x i32> [[LOAD]]
489+ ;
490+ store <vscale x 4 x i32 > %x , ptr %p
491+ %q = getelementptr <2 x i32 >, ptr %p , i64 1
492+ %load = load <vscale x 2 x i32 >, ptr %q
493+ ret <vscale x 2 x i32 > %load
494+ }
495+
496+ define <vscale x 2 x ptr > @load_v2p0_store_v4i32_forward_load (ptr %p , <vscale x 4 x i32 > %x ) {
497+ ; CHECK-LABEL: @load_v2p0_store_v4i32_forward_load(
498+ ; CHECK-NEXT: store <vscale x 4 x i32> [[X:%.*]], ptr [[P:%.*]], align 16
499+ ; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 2 x ptr>, ptr [[P]], align 16
500+ ; CHECK-NEXT: ret <vscale x 2 x ptr> [[LOAD]]
501+ ;
502+ store <vscale x 4 x i32 > %x , ptr %p
503+ %load = load <vscale x 2 x ptr >, ptr %p
504+ ret <vscale x 2 x ptr > %load
505+ }
506+
507+ define <vscale x 2 x i64 > @load_v2i64_store_v2p0_forward_load (ptr %p , <vscale x 2 x ptr > %x ) {
508+ ; CHECK-LABEL: @load_v2i64_store_v2p0_forward_load(
509+ ; CHECK-NEXT: store <vscale x 2 x ptr> [[X:%.*]], ptr [[P:%.*]], align 16
510+ ; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[P]], align 16
511+ ; CHECK-NEXT: ret <vscale x 2 x i64> [[LOAD]]
512+ ;
513+ store <vscale x 2 x ptr > %x , ptr %p
514+ %load = load <vscale x 2 x i64 >, ptr %p
515+ ret <vscale x 2 x i64 > %load
516+ }
517+
518+ define <vscale x 16 x i8 > @load_nxv16i8_store_v4i32_forward_load (ptr %p , <4 x i32 > %x ) {
519+ ; CHECK-LABEL: @load_nxv16i8_store_v4i32_forward_load(
520+ ; CHECK-NEXT: store <4 x i32> [[X:%.*]], ptr [[P:%.*]], align 16
521+ ; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[P]], align 16
522+ ; CHECK-NEXT: ret <vscale x 16 x i8> [[LOAD]]
523+ ;
524+ store <4 x i32 > %x , ptr %p
525+ %load = load <vscale x 16 x i8 >, ptr %p
526+ ret <vscale x 16 x i8 > %load
527+ }
528+
529+ define <16 x i8 > @load_v16i8_store_nxv4i32_forward_load (ptr %p , <vscale x 4 x i32 > %x ) {
530+ ; CHECK-LABEL: @load_v16i8_store_nxv4i32_forward_load(
531+ ; CHECK-NEXT: store <vscale x 4 x i32> [[X:%.*]], ptr [[P:%.*]], align 16
532+ ; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[P]], align 16
533+ ; CHECK-NEXT: ret <16 x i8> [[LOAD]]
534+ ;
535+ store <vscale x 4 x i32 > %x , ptr %p
536+ %load = load <16 x i8 >, ptr %p
537+ ret <16 x i8 > %load
538+ }
539+
540+ define <vscale x 16 x i8 > @load_v16i8_store_v4i32_forward_constant (ptr %p ) {
541+ ; CHECK-LABEL: @load_v16i8_store_v4i32_forward_constant(
542+ ; CHECK-NEXT: store <vscale x 4 x i32> splat (i32 4), ptr [[P:%.*]], align 16
543+ ; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[P]], align 16
544+ ; CHECK-NEXT: ret <vscale x 16 x i8> [[LOAD]]
545+ ;
546+ store <vscale x 4 x i32 > splat (i32 4 ), ptr %p
547+ %load = load <vscale x 16 x i8 >, ptr %p
548+ ret <vscale x 16 x i8 > %load
549+ }
550+
551+ define <vscale x 16 x i8 > @load_v16i8_struct_store_v4i32_forward_load (ptr %p , { <vscale x 4 x i32 > } %x ) {
552+ ; CHECK-LABEL: @load_v16i8_struct_store_v4i32_forward_load(
553+ ; CHECK-NEXT: store { <vscale x 4 x i32> } [[X:%.*]], ptr [[P:%.*]], align 16
554+ ; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[P]], align 16
555+ ; CHECK-NEXT: ret <vscale x 16 x i8> [[LOAD]]
556+ ;
557+ store { <vscale x 4 x i32 > } %x , ptr %p
558+ %load = load <vscale x 16 x i8 >, ptr %p
559+ ret <vscale x 16 x i8 > %load
560+ }
561+
562+ define {<vscale x 16 x i8 >} @load_v16i8_store_v4i32_struct_forward_load (ptr %p , <vscale x 4 x i32 > %x ) {
563+ ; CHECK-LABEL: @load_v16i8_store_v4i32_struct_forward_load(
564+ ; CHECK-NEXT: store <vscale x 4 x i32> [[X:%.*]], ptr [[P:%.*]], align 16
565+ ; CHECK-NEXT: [[LOAD:%.*]] = load { <vscale x 16 x i8> }, ptr [[P]], align 16
566+ ; CHECK-NEXT: ret { <vscale x 16 x i8> } [[LOAD]]
567+ ;
568+ store <vscale x 4 x i32 > %x , ptr %p
569+ %load = load { <vscale x 16 x i8 > }, ptr %p
570+ ret { <vscale x 16 x i8 > } %load
571+ }
572+
573+ define { <vscale x 16 x i8 >, <vscale x 16 x i8 >, <vscale x 16 x i8 >, <vscale x 16 x i8 > } @bigexample ({ <vscale x 4 x i32 >, <vscale x 4 x i32 >, <vscale x 4 x i32 >, <vscale x 4 x i32 > } %a ) vscale_range(1 ,16 ) {
574+ ; CHECK-LABEL: @bigexample(
575+ ; CHECK-NEXT: entry:
576+ ; CHECK-NEXT: [[REF_TMP:%.*]] = alloca { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }, align 16
577+ ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 -1, ptr nonnull [[REF_TMP]])
578+ ; CHECK-NEXT: [[A_ELT:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[A:%.*]], 0
579+ ; CHECK-NEXT: store <vscale x 4 x i32> [[A_ELT]], ptr [[REF_TMP]], align 16
580+ ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
581+ ; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 4
582+ ; CHECK-NEXT: [[REF_TMP_REPACK1:%.*]] = getelementptr inbounds i8, ptr [[REF_TMP]], i64 [[TMP1]]
583+ ; CHECK-NEXT: [[A_ELT2:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[A]], 1
584+ ; CHECK-NEXT: store <vscale x 4 x i32> [[A_ELT2]], ptr [[REF_TMP_REPACK1]], align 16
585+ ; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP0]], 5
586+ ; CHECK-NEXT: [[REF_TMP_REPACK3:%.*]] = getelementptr inbounds i8, ptr [[REF_TMP]], i64 [[TMP3]]
587+ ; CHECK-NEXT: [[A_ELT4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[A]], 2
588+ ; CHECK-NEXT: store <vscale x 4 x i32> [[A_ELT4]], ptr [[REF_TMP_REPACK3]], align 16
589+ ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP0]], 48
590+ ; CHECK-NEXT: [[REF_TMP_REPACK5:%.*]] = getelementptr inbounds i8, ptr [[REF_TMP]], i64 [[TMP5]]
591+ ; CHECK-NEXT: [[A_ELT6:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[A]], 3
592+ ; CHECK-NEXT: store <vscale x 4 x i32> [[A_ELT6]], ptr [[REF_TMP_REPACK5]], align 16
593+ ; CHECK-NEXT: [[DOTUNPACK:%.*]] = load <vscale x 16 x i8>, ptr [[REF_TMP]], align 16
594+ ; CHECK-NEXT: [[TMP6:%.*]] = insertvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } poison, <vscale x 16 x i8> [[DOTUNPACK]], 0
595+ ; CHECK-NEXT: [[DOTUNPACK8:%.*]] = load <vscale x 16 x i8>, ptr [[REF_TMP_REPACK1]], align 16
596+ ; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP6]], <vscale x 16 x i8> [[DOTUNPACK8]], 1
597+ ; CHECK-NEXT: [[DOTUNPACK10:%.*]] = load <vscale x 16 x i8>, ptr [[REF_TMP_REPACK3]], align 16
598+ ; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP9]], <vscale x 16 x i8> [[DOTUNPACK10]], 2
599+ ; CHECK-NEXT: [[DOTUNPACK12:%.*]] = load <vscale x 16 x i8>, ptr [[REF_TMP_REPACK5]], align 16
600+ ; CHECK-NEXT: [[TMP15:%.*]] = insertvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP12]], <vscale x 16 x i8> [[DOTUNPACK12]], 3
601+ ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 -1, ptr nonnull [[REF_TMP]])
602+ ; CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP15]]
603+ ;
604+ entry:
605+ %ref.tmp = alloca { <vscale x 4 x i32 >, <vscale x 4 x i32 >, <vscale x 4 x i32 >, <vscale x 4 x i32 > }, align 16
606+ call void @llvm.lifetime.start.p0 (i64 -1 , ptr nonnull %ref.tmp )
607+ %a.elt = extractvalue { <vscale x 4 x i32 >, <vscale x 4 x i32 >, <vscale x 4 x i32 >, <vscale x 4 x i32 > } %a , 0
608+ store <vscale x 4 x i32 > %a.elt , ptr %ref.tmp , align 16
609+ %0 = call i64 @llvm.vscale.i64 ()
610+ %1 = shl i64 %0 , 4
611+ %ref.tmp.repack1 = getelementptr inbounds i8 , ptr %ref.tmp , i64 %1
612+ %a.elt2 = extractvalue { <vscale x 4 x i32 >, <vscale x 4 x i32 >, <vscale x 4 x i32 >, <vscale x 4 x i32 > } %a , 1
613+ store <vscale x 4 x i32 > %a.elt2 , ptr %ref.tmp.repack1 , align 16
614+ %2 = call i64 @llvm.vscale.i64 ()
615+ %3 = shl i64 %2 , 5
616+ %ref.tmp.repack3 = getelementptr inbounds i8 , ptr %ref.tmp , i64 %3
617+ %a.elt4 = extractvalue { <vscale x 4 x i32 >, <vscale x 4 x i32 >, <vscale x 4 x i32 >, <vscale x 4 x i32 > } %a , 2
618+ store <vscale x 4 x i32 > %a.elt4 , ptr %ref.tmp.repack3 , align 16
619+ %4 = call i64 @llvm.vscale.i64 ()
620+ %5 = mul i64 %4 , 48
621+ %ref.tmp.repack5 = getelementptr inbounds i8 , ptr %ref.tmp , i64 %5
622+ %a.elt6 = extractvalue { <vscale x 4 x i32 >, <vscale x 4 x i32 >, <vscale x 4 x i32 >, <vscale x 4 x i32 > } %a , 3
623+ store <vscale x 4 x i32 > %a.elt6 , ptr %ref.tmp.repack5 , align 16
624+ %.unpack = load <vscale x 16 x i8 >, ptr %ref.tmp , align 16
625+ %6 = insertvalue { <vscale x 16 x i8 >, <vscale x 16 x i8 >, <vscale x 16 x i8 >, <vscale x 16 x i8 > } poison, <vscale x 16 x i8 > %.unpack , 0
626+ %7 = call i64 @llvm.vscale.i64 ()
627+ %8 = shl i64 %7 , 4
628+ %.elt7 = getelementptr inbounds i8 , ptr %ref.tmp , i64 %8
629+ %.unpack8 = load <vscale x 16 x i8 >, ptr %.elt7 , align 16
630+ %9 = insertvalue { <vscale x 16 x i8 >, <vscale x 16 x i8 >, <vscale x 16 x i8 >, <vscale x 16 x i8 > } %6 , <vscale x 16 x i8 > %.unpack8 , 1
631+ %10 = call i64 @llvm.vscale.i64 ()
632+ %11 = shl i64 %10 , 5
633+ %.elt9 = getelementptr inbounds i8 , ptr %ref.tmp , i64 %11
634+ %.unpack10 = load <vscale x 16 x i8 >, ptr %.elt9 , align 16
635+ %12 = insertvalue { <vscale x 16 x i8 >, <vscale x 16 x i8 >, <vscale x 16 x i8 >, <vscale x 16 x i8 > } %9 , <vscale x 16 x i8 > %.unpack10 , 2
636+ %13 = call i64 @llvm.vscale.i64 ()
637+ %14 = mul i64 %13 , 48
638+ %.elt11 = getelementptr inbounds i8 , ptr %ref.tmp , i64 %14
639+ %.unpack12 = load <vscale x 16 x i8 >, ptr %.elt11 , align 16
640+ %15 = insertvalue { <vscale x 16 x i8 >, <vscale x 16 x i8 >, <vscale x 16 x i8 >, <vscale x 16 x i8 > } %12 , <vscale x 16 x i8 > %.unpack12 , 3
641+ call void @llvm.lifetime.end.p0 (i64 -1 , ptr nonnull %ref.tmp )
642+ ret { <vscale x 16 x i8 >, <vscale x 16 x i8 >, <vscale x 16 x i8 >, <vscale x 16 x i8 > } %15
643+ }
0 commit comments