@@ -267,7 +267,7 @@ define <vscale x 32 x i16> @interleave4_nxv8i16(<vscale x 8 x i16> %vec0, <vscal
267267; SME2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
268268; SME2-NEXT: zip { z0.h - z3.h }, { z0.h - z3.h }
269269; SME2-NEXT: ret
270- %retval = call <vscale x 32 x i16 > @llvm.vector.interleave4.nxv8i16 (<vscale x 8 x i16 > %vec0 , <vscale x 8 x i16 > %vec1 , <vscale x 8 x i16 > %vec2 , <vscale x 8 x i16 > %vec3 )
270+ %retval = call <vscale x 32 x i16 > @llvm.vector.interleave4.nxv32i16 (<vscale x 8 x i16 > %vec0 , <vscale x 8 x i16 > %vec1 , <vscale x 8 x i16 > %vec2 , <vscale x 8 x i16 > %vec3 )
271271 ret <vscale x 32 x i16 > %retval
272272}
273273
@@ -540,6 +540,172 @@ define <vscale x 4 x i32> @interleave2_nxv2i32(<vscale x 2 x i32> %vec0, <vscale
540540 ret <vscale x 4 x i32 > %retval
541541}
542542
543+ define <vscale x 4 x i16 > @interleave2_same_const_splat_nxv4i16 () {
544+ ; SVE-LABEL: interleave2_same_const_splat_nxv4i16:
545+ ; SVE: // %bb.0:
546+ ; SVE-NEXT: mov z0.d, #3 // =0x3
547+ ; SVE-NEXT: zip2 z1.d, z0.d, z0.d
548+ ; SVE-NEXT: zip1 z0.d, z0.d, z0.d
549+ ; SVE-NEXT: uzp1 z0.s, z0.s, z1.s
550+ ; SVE-NEXT: ret
551+ ;
552+ ; SME2-LABEL: interleave2_same_const_splat_nxv4i16:
553+ ; SME2: // %bb.0:
554+ ; SME2-NEXT: mov z0.d, #3 // =0x3
555+ ; SME2-NEXT: zip { z0.d, z1.d }, z0.d, z0.d
556+ ; SME2-NEXT: uzp1 z0.s, z0.s, z1.s
557+ ; SME2-NEXT: ret
558+ %retval = call <vscale x 4 x i16 > @llvm.vector.interleave2.nxv4i16 (<vscale x 2 x i16 > splat(i16 3 ), <vscale x 2 x i16 > splat(i16 3 ))
559+ ret <vscale x 4 x i16 > %retval
560+ }
561+
562+ define <vscale x 4 x i16 > @interleave2_diff_const_splat_nxv4i16 () {
563+ ; SVE-LABEL: interleave2_diff_const_splat_nxv4i16:
564+ ; SVE: // %bb.0:
565+ ; SVE-NEXT: mov z0.d, #4 // =0x4
566+ ; SVE-NEXT: mov z1.d, #3 // =0x3
567+ ; SVE-NEXT: zip2 z2.d, z1.d, z0.d
568+ ; SVE-NEXT: zip1 z0.d, z1.d, z0.d
569+ ; SVE-NEXT: uzp1 z0.s, z0.s, z2.s
570+ ; SVE-NEXT: ret
571+ ;
572+ ; SME2-LABEL: interleave2_diff_const_splat_nxv4i16:
573+ ; SME2: // %bb.0:
574+ ; SME2-NEXT: mov z0.d, #4 // =0x4
575+ ; SME2-NEXT: mov z1.d, #3 // =0x3
576+ ; SME2-NEXT: zip { z0.d, z1.d }, z1.d, z0.d
577+ ; SME2-NEXT: uzp1 z0.s, z0.s, z1.s
578+ ; SME2-NEXT: ret
579+ %retval = call <vscale x 4 x i16 > @llvm.vector.interleave2.v4i16 (<vscale x 2 x i16 > splat(i16 3 ), <vscale x 2 x i16 > splat(i16 4 ))
580+ ret <vscale x 4 x i16 > %retval
581+ }
582+
583+ define <vscale x 4 x i16 > @interleave2_same_nonconst_splat_nxv4i16 (i16 %a ) {
584+ ; SVE-LABEL: interleave2_same_nonconst_splat_nxv4i16:
585+ ; SVE: // %bb.0:
586+ ; SVE-NEXT: // kill: def $w0 killed $w0 def $x0
587+ ; SVE-NEXT: mov z0.d, x0
588+ ; SVE-NEXT: zip2 z1.d, z0.d, z0.d
589+ ; SVE-NEXT: zip1 z0.d, z0.d, z0.d
590+ ; SVE-NEXT: uzp1 z0.s, z0.s, z1.s
591+ ; SVE-NEXT: ret
592+ ;
593+ ; SME2-LABEL: interleave2_same_nonconst_splat_nxv4i16:
594+ ; SME2: // %bb.0:
595+ ; SME2-NEXT: // kill: def $w0 killed $w0 def $x0
596+ ; SME2-NEXT: mov z0.d, x0
597+ ; SME2-NEXT: zip { z0.d, z1.d }, z0.d, z0.d
598+ ; SME2-NEXT: uzp1 z0.s, z0.s, z1.s
599+ ; SME2-NEXT: ret
600+ %ins = insertelement <vscale x 2 x i16 > poison, i16 %a , i32 0
601+ %splat = shufflevector <vscale x 2 x i16 > %ins , <vscale x 2 x i16 > poison, <vscale x 2 x i32 > zeroinitializer
602+ %retval = call <vscale x 4 x i16 > @llvm.vector.interleave2.nxv4i16 (<vscale x 2 x i16 > %splat , <vscale x 2 x i16 > %splat )
603+ ret <vscale x 4 x i16 > %retval
604+ }
605+
606+ define <vscale x 4 x i16 > @interleave2_diff_nonconst_splat_nxv4i16 (i16 %a , i16 %b ) {
607+ ; SVE-LABEL: interleave2_diff_nonconst_splat_nxv4i16:
608+ ; SVE: // %bb.0:
609+ ; SVE-NEXT: // kill: def $w1 killed $w1 def $x1
610+ ; SVE-NEXT: // kill: def $w0 killed $w0 def $x0
611+ ; SVE-NEXT: mov z0.d, x0
612+ ; SVE-NEXT: mov z1.d, x1
613+ ; SVE-NEXT: zip2 z2.d, z0.d, z1.d
614+ ; SVE-NEXT: zip1 z0.d, z0.d, z1.d
615+ ; SVE-NEXT: uzp1 z0.s, z0.s, z2.s
616+ ; SVE-NEXT: ret
617+ ;
618+ ; SME2-LABEL: interleave2_diff_nonconst_splat_nxv4i16:
619+ ; SME2: // %bb.0:
620+ ; SME2-NEXT: // kill: def $w1 killed $w1 def $x1
621+ ; SME2-NEXT: // kill: def $w0 killed $w0 def $x0
622+ ; SME2-NEXT: mov z0.d, x0
623+ ; SME2-NEXT: mov z1.d, x1
624+ ; SME2-NEXT: zip { z0.d, z1.d }, z0.d, z1.d
625+ ; SME2-NEXT: uzp1 z0.s, z0.s, z1.s
626+ ; SME2-NEXT: ret
627+ %ins1 = insertelement <vscale x 2 x i16 > poison, i16 %a , i32 0
628+ %splat1 = shufflevector <vscale x 2 x i16 > %ins1 , <vscale x 2 x i16 > poison, <vscale x 2 x i32 > zeroinitializer
629+ %ins2 = insertelement <vscale x 2 x i16 > poison, i16 %b , i32 0
630+ %splat2 = shufflevector <vscale x 2 x i16 > %ins2 , <vscale x 2 x i16 > poison, <vscale x 2 x i32 > zeroinitializer
631+ %retval = call <vscale x 4 x i16 > @llvm.vector.interleave2.nxv4i16 (<vscale x 2 x i16 > %splat1 , <vscale x 2 x i16 > %splat2 )
632+ ret <vscale x 4 x i16 > %retval
633+ }
634+
635+ define <vscale x 8 x i16 > @interleave4_same_const_splat_nxv8i16 () {
636+ ; SVE-LABEL: interleave4_same_const_splat_nxv8i16:
637+ ; SVE: // %bb.0:
638+ ; SVE-NEXT: mov z0.d, #3 // =0x3
639+ ; SVE-NEXT: zip1 z1.d, z0.d, z0.d
640+ ; SVE-NEXT: zip1 z2.d, z1.d, z1.d
641+ ; SVE-NEXT: zip2 z1.d, z1.d, z1.d
642+ ; SVE-NEXT: uzp1 z2.s, z2.s, z0.s
643+ ; SVE-NEXT: uzp1 z2.h, z2.h, z0.h
644+ ; SVE-NEXT: uunpklo z2.s, z2.h
645+ ; SVE-NEXT: uunpklo z2.d, z2.s
646+ ; SVE-NEXT: uzp1 z1.s, z2.s, z1.s
647+ ; SVE-NEXT: uzp1 z2.h, z1.h, z0.h
648+ ; SVE-NEXT: zip2 z0.d, z0.d, z0.d
649+ ; SVE-NEXT: uunpkhi z2.s, z2.h
650+ ; SVE-NEXT: zip1 z3.d, z0.d, z0.d
651+ ; SVE-NEXT: zip2 z0.d, z0.d, z0.d
652+ ; SVE-NEXT: uunpkhi z2.d, z2.s
653+ ; SVE-NEXT: uzp1 z2.s, z3.s, z2.s
654+ ; SVE-NEXT: uzp1 z2.h, z1.h, z2.h
655+ ; SVE-NEXT: uunpkhi z2.s, z2.h
656+ ; SVE-NEXT: uunpklo z2.d, z2.s
657+ ; SVE-NEXT: uzp1 z0.s, z2.s, z0.s
658+ ; SVE-NEXT: uzp1 z0.h, z1.h, z0.h
659+ ; SVE-NEXT: ret
660+ ;
661+ ; SME-ALL-LABEL: interleave4_same_const_splat_nxv8i16:
662+ ; SME-ALL: // %bb.0:
663+ ; SME-ALL-NEXT: mov z0.d, #3 // =0x3
664+ ; SME-ALL-NEXT: zip { z0.d, z1.d }, z0.d, z0.d
665+ ; SME-ALL-NEXT: zip { z2.d, z3.d }, z0.d, z0.d
666+ ; SME-ALL-NEXT: uzp1 z4.s, z2.s, z0.s
667+ ; SME-ALL-NEXT: uzp1 z4.h, z4.h, z0.h
668+ ; SME-ALL-NEXT: uunpklo z4.s, z4.h
669+ ; SME-ALL-NEXT: uunpklo z4.d, z4.s
670+ ; SME-ALL-NEXT: uzp1 z2.s, z4.s, z3.s
671+ ; SME-ALL-NEXT: uzp1 z3.h, z2.h, z0.h
672+ ; SME-ALL-NEXT: zip { z0.d, z1.d }, z1.d, z1.d
673+ ; SME-ALL-NEXT: uunpkhi z3.s, z3.h
674+ ; SME-ALL-NEXT: uunpkhi z3.d, z3.s
675+ ; SME-ALL-NEXT: uzp1 z3.s, z0.s, z3.s
676+ ; SME-ALL-NEXT: uzp1 z3.h, z2.h, z3.h
677+ ; SME-ALL-NEXT: uunpkhi z3.s, z3.h
678+ ; SME-ALL-NEXT: uunpklo z3.d, z3.s
679+ ; SME-ALL-NEXT: uzp1 z0.s, z3.s, z1.s
680+ ; SME-ALL-NEXT: uzp1 z0.h, z2.h, z0.h
681+ ; SME-ALL-NEXT: ret
682+ ;
683+ ; SME2-256-LABEL: interleave4_same_const_splat_nxv8i16:
684+ ; SME2-256: // %bb.0:
685+ ; SME2-256-NEXT: mov z0.d, #3 // =0x3
686+ ; SME2-256-NEXT: mov z1.d, z0.d
687+ ; SME2-256-NEXT: mov z2.d, z0.d
688+ ; SME2-256-NEXT: mov z3.d, z0.d
689+ ; SME2-256-NEXT: zip { z0.d - z3.d }, { z0.d - z3.d }
690+ ; SME2-256-NEXT: uzp1 z4.s, z0.s, z0.s
691+ ; SME2-256-NEXT: uzp1 z4.h, z4.h, z0.h
692+ ; SME2-256-NEXT: uunpklo z4.s, z4.h
693+ ; SME2-256-NEXT: uunpklo z4.d, z4.s
694+ ; SME2-256-NEXT: uzp1 z4.s, z4.s, z1.s
695+ ; SME2-256-NEXT: uzp1 z5.h, z4.h, z0.h
696+ ; SME2-256-NEXT: uunpkhi z5.s, z5.h
697+ ; SME2-256-NEXT: uunpkhi z5.d, z5.s
698+ ; SME2-256-NEXT: uzp1 z5.s, z2.s, z5.s
699+ ; SME2-256-NEXT: uzp1 z5.h, z4.h, z5.h
700+ ; SME2-256-NEXT: uunpkhi z5.s, z5.h
701+ ; SME2-256-NEXT: uunpklo z5.d, z5.s
702+ ; SME2-256-NEXT: uzp1 z0.s, z5.s, z3.s
703+ ; SME2-256-NEXT: uzp1 z0.h, z4.h, z0.h
704+ ; SME2-256-NEXT: ret
705+ %retval = call <vscale x 8 x i16 > @llvm.vector.interleave4.nxv8i16 (<vscale x 2 x i16 > splat(i16 3 ), <vscale x 2 x i16 > splat(i16 3 ), <vscale x 2 x i16 > splat(i16 3 ), <vscale x 2 x i16 > splat(i16 3 ))
706+ ret <vscale x 8 x i16 > %retval
707+ }
708+
543709; Float declarations
544710declare <vscale x 4 x half > @llvm.vector.interleave2.nxv4f16 (<vscale x 2 x half >, <vscale x 2 x half >)
545711declare <vscale x 8 x half > @llvm.vector.interleave2.nxv8f16 (<vscale x 4 x half >, <vscale x 4 x half >)
@@ -567,3 +733,5 @@ declare <vscale x 8 x i64> @llvm.vector.interleave2.nxv8i64(<vscale x 4 x i64>,
567733declare <vscale x 16 x i8 > @llvm.vector.interleave2.nxv16i8 (<vscale x 8 x i8 >, <vscale x 8 x i8 >)
568734declare <vscale x 8 x i16 > @llvm.vector.interleave2.nxv8i16 (<vscale x 4 x i16 >, <vscale x 4 x i16 >)
569735declare <vscale x 4 x i32 > @llvm.vector.interleave2.nxv4i32 (<vscale x 2 x i32 >, <vscale x 2 x i32 >)
736+ declare <vscale x 4 x i16 > @llvm.vector.interleave2.nxv4i16 (<vscale x 2 x i16 >, <vscale x 2 x i16 >)
737+ declare <vscale x 8 x i16 > @llvm.vector.interleave4.nxv8i16 (<vscale x 2 x i16 >, <vscale x 2 x i16 >, <vscale x 2 x i16 >, <vscale x 2 x i16 >)
0 commit comments