@@ -666,5 +666,91 @@ define {<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>} @not_scalable_vectors(ptr %p
666666 ret { <2 x i32 >, <2 x i32 >, <2 x i32 >, <2 x i32 > } %res3
667667}
668668
669+ define {<vscale x 2 x i32 >, <vscale x 2 x i32 >} @not_same_mask (<vscale x 2 x i1 > %mask0 , <vscale x 2 x i1 > %mask1 , ptr %ptr , i32 %rvl ) {
670+ ; RV32-LABEL: not_same_mask:
671+ ; RV32: # %bb.0:
672+ ; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
673+ ; RV32-NEXT: vmv1r.v v9, v0
674+ ; RV32-NEXT: vmv1r.v v0, v8
675+ ; RV32-NEXT: vmv.v.i v8, 0
676+ ; RV32-NEXT: li a2, -1
677+ ; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
678+ ; RV32-NEXT: vmv.v.i v10, 0
679+ ; RV32-NEXT: csrr a3, vlenb
680+ ; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
681+ ; RV32-NEXT: vmerge.vim v11, v8, 1, v0
682+ ; RV32-NEXT: vmv1r.v v0, v9
683+ ; RV32-NEXT: vmerge.vim v9, v8, 1, v0
684+ ; RV32-NEXT: srli a3, a3, 2
685+ ; RV32-NEXT: vwaddu.vv v12, v9, v11
686+ ; RV32-NEXT: vwmaccu.vx v12, a2, v11
687+ ; RV32-NEXT: vmsne.vi v0, v12, 0
688+ ; RV32-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
689+ ; RV32-NEXT: vslidedown.vx v9, v12, a3
690+ ; RV32-NEXT: vmerge.vim v10, v10, 1, v0
691+ ; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
692+ ; RV32-NEXT: vmsne.vi v0, v9, 0
693+ ; RV32-NEXT: add a2, a3, a3
694+ ; RV32-NEXT: vmerge.vim v8, v8, 1, v0
695+ ; RV32-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
696+ ; RV32-NEXT: vslideup.vx v10, v8, a3
697+ ; RV32-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
698+ ; RV32-NEXT: vmsne.vi v0, v10, 0
699+ ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
700+ ; RV32-NEXT: vle32.v v10, (a0), v0.t
701+ ; RV32-NEXT: li a0, 32
702+ ; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
703+ ; RV32-NEXT: vnsrl.wx v9, v10, a0
704+ ; RV32-NEXT: vnsrl.wi v8, v10, 0
705+ ; RV32-NEXT: ret
706+ ;
707+ ; RV64-LABEL: not_same_mask:
708+ ; RV64: # %bb.0:
709+ ; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
710+ ; RV64-NEXT: vmv1r.v v9, v0
711+ ; RV64-NEXT: vmv1r.v v0, v8
712+ ; RV64-NEXT: vmv.v.i v8, 0
713+ ; RV64-NEXT: li a2, -1
714+ ; RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
715+ ; RV64-NEXT: vmv.v.i v10, 0
716+ ; RV64-NEXT: csrr a3, vlenb
717+ ; RV64-NEXT: slli a1, a1, 32
718+ ; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
719+ ; RV64-NEXT: vmerge.vim v11, v8, 1, v0
720+ ; RV64-NEXT: vmv1r.v v0, v9
721+ ; RV64-NEXT: vmerge.vim v9, v8, 1, v0
722+ ; RV64-NEXT: srli a3, a3, 2
723+ ; RV64-NEXT: vwaddu.vv v12, v9, v11
724+ ; RV64-NEXT: vwmaccu.vx v12, a2, v11
725+ ; RV64-NEXT: vmsne.vi v0, v12, 0
726+ ; RV64-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
727+ ; RV64-NEXT: vslidedown.vx v9, v12, a3
728+ ; RV64-NEXT: vmerge.vim v10, v10, 1, v0
729+ ; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
730+ ; RV64-NEXT: vmsne.vi v0, v9, 0
731+ ; RV64-NEXT: add a2, a3, a3
732+ ; RV64-NEXT: vmerge.vim v8, v8, 1, v0
733+ ; RV64-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
734+ ; RV64-NEXT: vslideup.vx v10, v8, a3
735+ ; RV64-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
736+ ; RV64-NEXT: vmsne.vi v0, v10, 0
737+ ; RV64-NEXT: srli a1, a1, 32
738+ ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
739+ ; RV64-NEXT: vle32.v v10, (a0), v0.t
740+ ; RV64-NEXT: li a0, 32
741+ ; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
742+ ; RV64-NEXT: vnsrl.wx v9, v10, a0
743+ ; RV64-NEXT: vnsrl.wi v8, v10, 0
744+ ; RV64-NEXT: ret
745+ %interleaved.mask = tail call <vscale x 4 x i1 > @llvm.vector.interleave2.nxv4i1 (<vscale x 2 x i1 > %mask0 , <vscale x 2 x i1 > %mask1 )
746+ %wide.masked.load = tail call <vscale x 4 x i32 > @llvm.vp.load.nxv4i32.p0 (ptr %ptr , <vscale x 4 x i1 > %interleaved.mask , i32 %rvl )
747+ %deinterleaved.results = tail call { <vscale x 2 x i32 >, <vscale x 2 x i32 > } @llvm.vector.deinterleave2.nxv16i32 (<vscale x 4 x i32 > %wide.masked.load )
748+ %t0 = extractvalue { <vscale x 2 x i32 >, <vscale x 2 x i32 > } %deinterleaved.results , 0
749+ %t1 = extractvalue { <vscale x 2 x i32 >, <vscale x 2 x i32 > } %deinterleaved.results , 1
750+ %res0 = insertvalue { <vscale x 2 x i32 >, <vscale x 2 x i32 > } undef , <vscale x 2 x i32 > %t0 , 0
751+ %res1 = insertvalue { <vscale x 2 x i32 >, <vscale x 2 x i32 > } %res0 , <vscale x 2 x i32 > %t1 , 1
752+ ret { <vscale x 2 x i32 >, <vscale x 2 x i32 > } %res1
753+ }
754+
669755;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
670756; CHECK: {{.*}}
0 commit comments