Skip to content

Commit b3a8d0e

Browse files
committed
[RISCV] Add additional coverage for one hot interleave load cases [nfc]
Add coverage for fixed vector vp.load, and the deinterleave intrinsic paths.
1 parent 0dae924 commit b3a8d0e

File tree

2 files changed

+71
-4
lines changed

2 files changed

+71
-4
lines changed

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1718,6 +1718,28 @@ define void @load_factor4_one_active_storeback_full(ptr %ptr) {
17181718
ret void
17191719
}
17201720

1721+
define <4 x i32> @vp_load_factor3_one_active(ptr %ptr) {
1722+
; CHECK-LABEL: vp_load_factor3_one_active:
1723+
; CHECK: # %bb.0:
1724+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1725+
; CHECK-NEXT: vlseg3e32.v v8, (a0)
1726+
; CHECK-NEXT: ret
1727+
%interleaved.vec = tail call <12 x i32> @llvm.vp.load.v12i32.p0(ptr %ptr, <12 x i1> splat (i1 true), i32 12)
1728+
%v0 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
1729+
ret <4 x i32> %v0
1730+
}
1731+
1732+
define <4 x i32> @vp_load_factor5_one_active(ptr %ptr) {
1733+
; CHECK-LABEL: vp_load_factor5_one_active:
1734+
; CHECK: # %bb.0:
1735+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1736+
; CHECK-NEXT: vlseg5e32.v v8, (a0)
1737+
; CHECK-NEXT: ret
1738+
%interleaved.vec = tail call <20 x i32> @llvm.vp.load.v20i32.p0(ptr %ptr, <20 x i1> splat (i1 true), i32 20)
1739+
%v0 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> <i32 0, i32 5, i32 10, i32 15>
1740+
ret <4 x i32> %v0
1741+
}
1742+
17211743
define void @store_factor4_one_active(ptr %ptr, <4 x i32> %v) {
17221744
; CHECK-LABEL: store_factor4_one_active:
17231745
; CHECK: # %bb.0:
@@ -1804,8 +1826,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_mask(ptr %ptr) {
18041826
; RV32-NEXT: vle32.v v12, (a0), v0.t
18051827
; RV32-NEXT: li a0, 36
18061828
; RV32-NEXT: vmv.s.x v20, a1
1807-
; RV32-NEXT: lui a1, %hi(.LCPI51_0)
1808-
; RV32-NEXT: addi a1, a1, %lo(.LCPI51_0)
1829+
; RV32-NEXT: lui a1, %hi(.LCPI53_0)
1830+
; RV32-NEXT: addi a1, a1, %lo(.LCPI53_0)
18091831
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
18101832
; RV32-NEXT: vle16.v v21, (a1)
18111833
; RV32-NEXT: vcompress.vm v8, v12, v11
@@ -1880,8 +1902,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_evl(ptr %ptr) {
18801902
; RV32-NEXT: vmv.s.x v10, a0
18811903
; RV32-NEXT: li a0, 146
18821904
; RV32-NEXT: vmv.s.x v11, a0
1883-
; RV32-NEXT: lui a0, %hi(.LCPI52_0)
1884-
; RV32-NEXT: addi a0, a0, %lo(.LCPI52_0)
1905+
; RV32-NEXT: lui a0, %hi(.LCPI54_0)
1906+
; RV32-NEXT: addi a0, a0, %lo(.LCPI54_0)
18851907
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
18861908
; RV32-NEXT: vle16.v v20, (a0)
18871909
; RV32-NEXT: li a0, 36

llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -648,6 +648,51 @@ define void @masked_store_factor4_v2(<vscale x 1 x i1> %mask, <vscale x 1 x i32>
648648
ret void
649649
}
650650

651+
define <vscale x 2 x i32> @load_factor2_oneactive(ptr %ptr, i32 %evl) {
652+
; RV32-LABEL: load_factor2_oneactive:
653+
; RV32: # %bb.0:
654+
; RV32-NEXT: slli a1, a1, 2
655+
; RV32-NEXT: srli a1, a1, 1
656+
; RV32-NEXT: vsetvli zero, a1, e32, m1, ta, ma
657+
; RV32-NEXT: vlseg2e32.v v7, (a0)
658+
; RV32-NEXT: ret
659+
;
660+
; RV64-LABEL: load_factor2_oneactive:
661+
; RV64: # %bb.0:
662+
; RV64-NEXT: slli a1, a1, 34
663+
; RV64-NEXT: srli a1, a1, 33
664+
; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma
665+
; RV64-NEXT: vlseg2e32.v v7, (a0)
666+
; RV64-NEXT: ret
667+
%rvl = mul nuw i32 %evl, 4
668+
%wide.masked.load = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr %ptr, <vscale x 4 x i1> splat (i1 true), i32 %rvl)
669+
%deinterleaved.results = call { <vscale x 2 x i32>, <vscale x 2 x i32> } @llvm.vector.deinterleave2.nxv4i32(<vscale x 4 x i32> %wide.masked.load)
670+
%t0 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %deinterleaved.results, 1
671+
ret <vscale x 2 x i32> %t0
672+
}
673+
674+
define <vscale x 2 x i32> @load_factor5_oneactive(ptr %ptr, i32 %evl) {
675+
; RV32-LABEL: load_factor5_oneactive:
676+
; RV32: # %bb.0:
677+
; RV32-NEXT: vsetvli zero, a1, e32, m1, ta, ma
678+
; RV32-NEXT: vlseg5e32.v v5, (a0)
679+
; RV32-NEXT: ret
680+
;
681+
; RV64-LABEL: load_factor5_oneactive:
682+
; RV64: # %bb.0:
683+
; RV64-NEXT: slli a1, a1, 32
684+
; RV64-NEXT: srli a1, a1, 32
685+
; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma
686+
; RV64-NEXT: vlseg5e32.v v5, (a0)
687+
; RV64-NEXT: ret
688+
%rvl = mul nuw i32 %evl, 5
689+
%wide.masked.load = call <vscale x 10 x i32> @llvm.vp.load(ptr %ptr, <vscale x 10 x i1> splat (i1 true), i32 %rvl)
690+
%deinterleaved.results = call { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } @llvm.vector.deinterleave5(<vscale x 10 x i32> %wide.masked.load)
691+
%t3 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } %deinterleaved.results, 3
692+
ret <vscale x 2 x i32> %t3
693+
}
694+
695+
651696
; Negative tests
652697

653698
define {<vscale x 2 x i32>, <vscale x 2 x i32>} @not_same_mask(<vscale x 2 x i1> %mask0, <vscale x 2 x i1> %mask1, ptr %ptr, i32 %evl) {

0 commit comments

Comments
 (0)