Skip to content

Commit 87c2adb

Browse files
committed
[RISCV][IA] Precommit tests for deinterleaveN of masked.load
1 parent 7fd91bb commit 87c2adb

File tree

1 file changed

+161
-0
lines changed

1 file changed

+161
-0
lines changed

llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -538,3 +538,164 @@ define { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x
538538
%res7 = insertvalue { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8> } %res6, <vscale x 8 x i8> %t7, 7
539539
ret { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8> } %res7
540540
}
541+
542+
define {<vscale x 16 x i8>, <vscale x 16 x i8>} @masked_load_factor2(ptr %p) {
543+
; CHECK-LABEL: masked_load_factor2:
544+
; CHECK: # %bb.0:
545+
; CHECK-NEXT: vl4r.v v12, (a0)
546+
; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
547+
; CHECK-NEXT: vnsrl.wi v8, v12, 0
548+
; CHECK-NEXT: vnsrl.wi v10, v12, 8
549+
; CHECK-NEXT: ret
550+
%vec = call <vscale x 32 x i8> @llvm.masked.load(ptr %p, i32 4, <vscale x 32 x i1> splat (i1 true), <vscale x 32 x i8> poison)
551+
%deinterleaved.results = call {<vscale x 16 x i8>, <vscale x 16 x i8>} @llvm.vector.deinterleave2.nxv32i8(<vscale x 32 x i8> %vec)
552+
ret {<vscale x 16 x i8>, <vscale x 16 x i8>} %deinterleaved.results
553+
}
554+
555+
define {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} @masked_loat_factor4(ptr %p) {
556+
; CHECK-LABEL: masked_loat_factor4:
557+
; CHECK: # %bb.0:
558+
; CHECK-NEXT: addi sp, sp, -16
559+
; CHECK-NEXT: .cfi_def_cfa_offset 16
560+
; CHECK-NEXT: csrr a1, vlenb
561+
; CHECK-NEXT: slli a1, a1, 2
562+
; CHECK-NEXT: sub sp, sp, a1
563+
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
564+
; CHECK-NEXT: vl4r.v v8, (a0)
565+
; CHECK-NEXT: addi a0, sp, 16
566+
; CHECK-NEXT: vs4r.v v8, (a0)
567+
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
568+
; CHECK-NEXT: vlseg4e8.v v8, (a0)
569+
; CHECK-NEXT: csrr a0, vlenb
570+
; CHECK-NEXT: slli a0, a0, 2
571+
; CHECK-NEXT: add sp, sp, a0
572+
; CHECK-NEXT: .cfi_def_cfa sp, 16
573+
; CHECK-NEXT: addi sp, sp, 16
574+
; CHECK-NEXT: .cfi_def_cfa_offset 0
575+
; CHECK-NEXT: ret
576+
%vec = call <vscale x 32 x i8> @llvm.masked.load(ptr %p, i32 4, <vscale x 32 x i1> splat (i1 true), <vscale x 32 x i8> poison)
577+
%deinterleaved.results = call {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} @llvm.vector.deinterleave4.nxv32i8(<vscale x 32 x i8> %vec)
578+
ret {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} %deinterleaved.results
579+
}
580+
581+
define {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} @masked_loat_factor4_mask(ptr %p, <vscale x 8 x i1> %mask) {
582+
; CHECK-LABEL: masked_loat_factor4_mask:
583+
; CHECK: # %bb.0:
584+
; CHECK-NEXT: addi sp, sp, -16
585+
; CHECK-NEXT: .cfi_def_cfa_offset 16
586+
; CHECK-NEXT: csrr a1, vlenb
587+
; CHECK-NEXT: slli a1, a1, 3
588+
; CHECK-NEXT: sub sp, sp, a1
589+
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
590+
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
591+
; CHECK-NEXT: vmv.v.i v8, 0
592+
; CHECK-NEXT: addi a1, sp, 16
593+
; CHECK-NEXT: csrr a2, vlenb
594+
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
595+
; CHECK-NEXT: add a3, a1, a2
596+
; CHECK-NEXT: vmv.v.v v9, v8
597+
; CHECK-NEXT: srli a4, a2, 2
598+
; CHECK-NEXT: vmv.v.v v10, v8
599+
; CHECK-NEXT: srli a5, a2, 3
600+
; CHECK-NEXT: vmv.v.v v11, v8
601+
; CHECK-NEXT: vsseg4e8.v v8, (a1)
602+
; CHECK-NEXT: vl1r.v v8, (a1)
603+
; CHECK-NEXT: add a1, a4, a5
604+
; CHECK-NEXT: vl1r.v v9, (a3)
605+
; CHECK-NEXT: add a3, a3, a2
606+
; CHECK-NEXT: add a2, a3, a2
607+
; CHECK-NEXT: vl1r.v v10, (a3)
608+
; CHECK-NEXT: vl1r.v v11, (a2)
609+
; CHECK-NEXT: vmsne.vi v9, v9, 0
610+
; CHECK-NEXT: vmsne.vi v0, v8, 0
611+
; CHECK-NEXT: vmsne.vi v8, v10, 0
612+
; CHECK-NEXT: vmsne.vi v10, v11, 0
613+
; CHECK-NEXT: vsetvli zero, a4, e8, mf2, tu, ma
614+
; CHECK-NEXT: vslideup.vx v0, v9, a5
615+
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma
616+
; CHECK-NEXT: vslideup.vx v0, v8, a4
617+
; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
618+
; CHECK-NEXT: vslideup.vx v0, v10, a1
619+
; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma
620+
; CHECK-NEXT: vle8.v v8, (a0), v0.t
621+
; CHECK-NEXT: csrr a0, vlenb
622+
; CHECK-NEXT: slli a0, a0, 2
623+
; CHECK-NEXT: add a0, sp, a0
624+
; CHECK-NEXT: addi a0, a0, 16
625+
; CHECK-NEXT: vs4r.v v8, (a0)
626+
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
627+
; CHECK-NEXT: vlseg4e8.v v8, (a0)
628+
; CHECK-NEXT: csrr a0, vlenb
629+
; CHECK-NEXT: slli a0, a0, 3
630+
; CHECK-NEXT: add sp, sp, a0
631+
; CHECK-NEXT: .cfi_def_cfa sp, 16
632+
; CHECK-NEXT: addi sp, sp, 16
633+
; CHECK-NEXT: .cfi_def_cfa_offset 0
634+
; CHECK-NEXT: ret
635+
%interleaved.mask = tail call <vscale x 32 x i1> @llvm.vector.interleave4.nxv32i1(<vscale x 8 x i1> %mask, <vscale x 8 x i1> %mask, <vscale x 8 x i1> %mask, <vscale x 8 x i1> %mask)
636+
%vec = call <vscale x 32 x i8> @llvm.masked.load(ptr %p, i32 4, <vscale x 32 x i1> %interleaved.mask, <vscale x 32 x i8> poison)
637+
%deinterleaved.results = call {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} @llvm.vector.deinterleave4.nxv32i8(<vscale x 32 x i8> %vec)
638+
ret {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} %deinterleaved.results
639+
}
640+
641+
; Negative test - some of the deinterleaved elements might come from the
642+
; passthru not the load
643+
define {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} @masked_loat_factor4_passthru(ptr %p, <vscale x 8 x i1> %mask, <vscale x 32 x i8> %passthru) {
644+
; CHECK-LABEL: masked_loat_factor4_passthru:
645+
; CHECK: # %bb.0:
646+
; CHECK-NEXT: addi sp, sp, -16
647+
; CHECK-NEXT: .cfi_def_cfa_offset 16
648+
; CHECK-NEXT: csrr a1, vlenb
649+
; CHECK-NEXT: slli a1, a1, 3
650+
; CHECK-NEXT: sub sp, sp, a1
651+
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
652+
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
653+
; CHECK-NEXT: vmv.v.i v12, 0
654+
; CHECK-NEXT: addi a1, sp, 16
655+
; CHECK-NEXT: csrr a2, vlenb
656+
; CHECK-NEXT: vmerge.vim v12, v12, 1, v0
657+
; CHECK-NEXT: add a3, a1, a2
658+
; CHECK-NEXT: vmv.v.v v13, v12
659+
; CHECK-NEXT: srli a4, a2, 2
660+
; CHECK-NEXT: vmv.v.v v14, v12
661+
; CHECK-NEXT: srli a5, a2, 3
662+
; CHECK-NEXT: vmv.v.v v15, v12
663+
; CHECK-NEXT: vsseg4e8.v v12, (a1)
664+
; CHECK-NEXT: vl1r.v v12, (a1)
665+
; CHECK-NEXT: add a1, a4, a5
666+
; CHECK-NEXT: vl1r.v v13, (a3)
667+
; CHECK-NEXT: add a3, a3, a2
668+
; CHECK-NEXT: add a2, a3, a2
669+
; CHECK-NEXT: vl1r.v v14, (a3)
670+
; CHECK-NEXT: vl1r.v v15, (a2)
671+
; CHECK-NEXT: vmsne.vi v13, v13, 0
672+
; CHECK-NEXT: vmsne.vi v0, v12, 0
673+
; CHECK-NEXT: vmsne.vi v12, v14, 0
674+
; CHECK-NEXT: vmsne.vi v14, v15, 0
675+
; CHECK-NEXT: vsetvli zero, a4, e8, mf2, tu, ma
676+
; CHECK-NEXT: vslideup.vx v0, v13, a5
677+
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma
678+
; CHECK-NEXT: vslideup.vx v0, v12, a4
679+
; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
680+
; CHECK-NEXT: vslideup.vx v0, v14, a1
681+
; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, mu
682+
; CHECK-NEXT: vle8.v v8, (a0), v0.t
683+
; CHECK-NEXT: csrr a0, vlenb
684+
; CHECK-NEXT: slli a0, a0, 2
685+
; CHECK-NEXT: add a0, sp, a0
686+
; CHECK-NEXT: addi a0, a0, 16
687+
; CHECK-NEXT: vs4r.v v8, (a0)
688+
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
689+
; CHECK-NEXT: vlseg4e8.v v8, (a0)
690+
; CHECK-NEXT: csrr a0, vlenb
691+
; CHECK-NEXT: slli a0, a0, 3
692+
; CHECK-NEXT: add sp, sp, a0
693+
; CHECK-NEXT: .cfi_def_cfa sp, 16
694+
; CHECK-NEXT: addi sp, sp, 16
695+
; CHECK-NEXT: .cfi_def_cfa_offset 0
696+
; CHECK-NEXT: ret
697+
%interleaved.mask = tail call <vscale x 32 x i1> @llvm.vector.interleave4.nxv32i1(<vscale x 8 x i1> %mask, <vscale x 8 x i1> %mask, <vscale x 8 x i1> %mask, <vscale x 8 x i1> %mask)
698+
%vec = call <vscale x 32 x i8> @llvm.masked.load(ptr %p, i32 4, <vscale x 32 x i1> %interleaved.mask, <vscale x 32 x i8> %passthru)
699+
%deinterleaved.results = call {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} @llvm.vector.deinterleave4.nxv32i8(<vscale x 32 x i8> %vec)
700+
ret {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} %deinterleaved.results
701+
}

0 commit comments

Comments
 (0)