Skip to content

Commit 7c37722

Browse files
preameslukel97
andauthored
[IA] Recognize repeated masks which come from shuffle vectors (#150285)
This extends the fixed vector lowering to support the case where the mask is formed via shufflevector idiom. --------- Co-authored-by: Luke Lau <[email protected]>
1 parent 4e3266f commit 7c37722

File tree

2 files changed

+117
-36
lines changed

2 files changed

+117
-36
lines changed

llvm/lib/CodeGen/InterleavedAccessPass.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -587,6 +587,27 @@ static Value *getMask(Value *WideMask, unsigned Factor,
587587
}
588588
}
589589

590+
if (auto *SVI = dyn_cast<ShuffleVectorInst>(WideMask)) {
591+
// Check that the shuffle mask is: a) an interleave, b) all of the same
592+
// set of the elements, and c) contained by the first source. (c) could
593+
// be relaxed if desired.
594+
unsigned NumSrcElts =
595+
cast<FixedVectorType>(SVI->getOperand(1)->getType())->getNumElements();
596+
SmallVector<unsigned> StartIndexes;
597+
if (ShuffleVectorInst::isInterleaveMask(SVI->getShuffleMask(), Factor,
598+
NumSrcElts * 2, StartIndexes) &&
599+
llvm::all_of(StartIndexes, [](unsigned Start) { return Start == 0; }) &&
600+
llvm::all_of(SVI->getShuffleMask(), [&NumSrcElts](int Idx) {
601+
return Idx < (int)NumSrcElts;
602+
})) {
603+
auto *LeafMaskTy =
604+
VectorType::get(Type::getInt1Ty(SVI->getContext()), LeafValueEC);
605+
IRBuilder<> Builder(SVI);
606+
return Builder.CreateExtractVector(LeafMaskTy, SVI->getOperand(0),
607+
uint64_t(0));
608+
}
609+
}
610+
590611
return nullptr;
591612
}
592613

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll

Lines changed: 96 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,48 @@ define {<4 x i32>, <4 x i32>} @vpload_factor2_interleaved_mask_intrinsic(ptr %pt
205205
ret {<4 x i32>, <4 x i32>} %res1
206206
}
207207

208+
define {<4 x i32>, <4 x i32>} @vpload_factor2_interleaved_mask_shuffle(ptr %ptr, <4 x i1> %m) {
209+
; CHECK-LABEL: vpload_factor2_interleaved_mask_shuffle:
210+
; CHECK: # %bb.0:
211+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
212+
; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t
213+
; CHECK-NEXT: ret
214+
%interleaved.mask = shufflevector <4 x i1> %m, <4 x i1> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
215+
%interleaved.vec = tail call <8 x i32> @llvm.vp.load.v8i32.p0(ptr %ptr, <8 x i1> %interleaved.mask, i32 8)
216+
%v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
217+
%v1 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
218+
%res0 = insertvalue {<4 x i32>, <4 x i32>} undef, <4 x i32> %v0, 0
219+
%res1 = insertvalue {<4 x i32>, <4 x i32>} %res0, <4 x i32> %v1, 1
220+
ret {<4 x i32>, <4 x i32>} %res1
221+
}
222+
223+
define {<4 x i32>, <4 x i32>} @vpload_factor2_interleaved_mask_shuffle2(ptr %ptr, <2 x i1> %m) {
224+
; CHECK-LABEL: vpload_factor2_interleaved_mask_shuffle2:
225+
; CHECK: # %bb.0:
226+
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
227+
; CHECK-NEXT: vmv.v.i v8, 0
228+
; CHECK-NEXT: li a1, -1
229+
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
230+
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
231+
; CHECK-NEXT: vwaddu.vv v9, v8, v8
232+
; CHECK-NEXT: vwmaccu.vx v9, a1, v8
233+
; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
234+
; CHECK-NEXT: vmsne.vi v0, v9, 0
235+
; CHECK-NEXT: vle32.v v10, (a0), v0.t
236+
; CHECK-NEXT: li a0, 32
237+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
238+
; CHECK-NEXT: vnsrl.wi v8, v10, 0
239+
; CHECK-NEXT: vnsrl.wx v9, v10, a0
240+
; CHECK-NEXT: ret
241+
%interleaved.mask = shufflevector <2 x i1> %m, <2 x i1> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
242+
%interleaved.vec = tail call <8 x i32> @llvm.vp.load.v8i32.p0(ptr %ptr, <8 x i1> %interleaved.mask, i32 4)
243+
%v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
244+
%v1 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
245+
%res0 = insertvalue {<4 x i32>, <4 x i32>} undef, <4 x i32> %v0, 0
246+
%res1 = insertvalue {<4 x i32>, <4 x i32>} %res0, <4 x i32> %v1, 1
247+
ret {<4 x i32>, <4 x i32>} %res1
248+
}
249+
208250
define {<4 x i32>, <4 x i32>, <4 x i32>} @vpload_factor3(ptr %ptr) {
209251
; CHECK-LABEL: vpload_factor3:
210252
; CHECK: # %bb.0:
@@ -437,8 +479,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
437479
; RV32-NEXT: li a2, 32
438480
; RV32-NEXT: lui a3, 12
439481
; RV32-NEXT: lui a6, 12291
440-
; RV32-NEXT: lui a7, %hi(.LCPI21_0)
441-
; RV32-NEXT: addi a7, a7, %lo(.LCPI21_0)
482+
; RV32-NEXT: lui a7, %hi(.LCPI23_0)
483+
; RV32-NEXT: addi a7, a7, %lo(.LCPI23_0)
442484
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
443485
; RV32-NEXT: vle32.v v24, (a5)
444486
; RV32-NEXT: vmv.s.x v0, a3
@@ -523,12 +565,12 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
523565
; RV32-NEXT: addi a1, a1, 16
524566
; RV32-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill
525567
; RV32-NEXT: lui a7, 49164
526-
; RV32-NEXT: lui a1, %hi(.LCPI21_1)
527-
; RV32-NEXT: addi a1, a1, %lo(.LCPI21_1)
568+
; RV32-NEXT: lui a1, %hi(.LCPI23_1)
569+
; RV32-NEXT: addi a1, a1, %lo(.LCPI23_1)
528570
; RV32-NEXT: lui t2, 3
529571
; RV32-NEXT: lui t1, 196656
530-
; RV32-NEXT: lui a4, %hi(.LCPI21_3)
531-
; RV32-NEXT: addi a4, a4, %lo(.LCPI21_3)
572+
; RV32-NEXT: lui a4, %hi(.LCPI23_3)
573+
; RV32-NEXT: addi a4, a4, %lo(.LCPI23_3)
532574
; RV32-NEXT: lui t0, 786624
533575
; RV32-NEXT: li a5, 48
534576
; RV32-NEXT: lui a6, 768
@@ -707,8 +749,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
707749
; RV32-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
708750
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
709751
; RV32-NEXT: vrgatherei16.vv v24, v8, v2
710-
; RV32-NEXT: lui a1, %hi(.LCPI21_2)
711-
; RV32-NEXT: addi a1, a1, %lo(.LCPI21_2)
752+
; RV32-NEXT: lui a1, %hi(.LCPI23_2)
753+
; RV32-NEXT: addi a1, a1, %lo(.LCPI23_2)
712754
; RV32-NEXT: lui a3, 3073
713755
; RV32-NEXT: addi a3, a3, -1024
714756
; RV32-NEXT: vmv.s.x v0, a3
@@ -772,16 +814,16 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
772814
; RV32-NEXT: vrgatherei16.vv v28, v8, v3
773815
; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma
774816
; RV32-NEXT: vmv.v.v v28, v24
775-
; RV32-NEXT: lui a1, %hi(.LCPI21_4)
776-
; RV32-NEXT: addi a1, a1, %lo(.LCPI21_4)
777-
; RV32-NEXT: lui a2, %hi(.LCPI21_5)
778-
; RV32-NEXT: addi a2, a2, %lo(.LCPI21_5)
817+
; RV32-NEXT: lui a1, %hi(.LCPI23_4)
818+
; RV32-NEXT: addi a1, a1, %lo(.LCPI23_4)
819+
; RV32-NEXT: lui a2, %hi(.LCPI23_5)
820+
; RV32-NEXT: addi a2, a2, %lo(.LCPI23_5)
779821
; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
780822
; RV32-NEXT: vle16.v v24, (a2)
781823
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
782824
; RV32-NEXT: vle16.v v8, (a1)
783-
; RV32-NEXT: lui a1, %hi(.LCPI21_7)
784-
; RV32-NEXT: addi a1, a1, %lo(.LCPI21_7)
825+
; RV32-NEXT: lui a1, %hi(.LCPI23_7)
826+
; RV32-NEXT: addi a1, a1, %lo(.LCPI23_7)
785827
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
786828
; RV32-NEXT: vle16.v v10, (a1)
787829
; RV32-NEXT: csrr a1, vlenb
@@ -809,14 +851,14 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
809851
; RV32-NEXT: vl8r.v v0, (a1) # vscale x 64-byte Folded Reload
810852
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
811853
; RV32-NEXT: vrgatherei16.vv v16, v0, v10
812-
; RV32-NEXT: lui a1, %hi(.LCPI21_6)
813-
; RV32-NEXT: addi a1, a1, %lo(.LCPI21_6)
814-
; RV32-NEXT: lui a2, %hi(.LCPI21_8)
815-
; RV32-NEXT: addi a2, a2, %lo(.LCPI21_8)
854+
; RV32-NEXT: lui a1, %hi(.LCPI23_6)
855+
; RV32-NEXT: addi a1, a1, %lo(.LCPI23_6)
856+
; RV32-NEXT: lui a2, %hi(.LCPI23_8)
857+
; RV32-NEXT: addi a2, a2, %lo(.LCPI23_8)
816858
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
817859
; RV32-NEXT: vle16.v v4, (a1)
818-
; RV32-NEXT: lui a1, %hi(.LCPI21_9)
819-
; RV32-NEXT: addi a1, a1, %lo(.LCPI21_9)
860+
; RV32-NEXT: lui a1, %hi(.LCPI23_9)
861+
; RV32-NEXT: addi a1, a1, %lo(.LCPI23_9)
820862
; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
821863
; RV32-NEXT: vle16.v v6, (a1)
822864
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
@@ -903,8 +945,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
903945
; RV64-NEXT: li a4, 128
904946
; RV64-NEXT: lui a1, 1
905947
; RV64-NEXT: vle64.v v8, (a3)
906-
; RV64-NEXT: lui a3, %hi(.LCPI21_0)
907-
; RV64-NEXT: addi a3, a3, %lo(.LCPI21_0)
948+
; RV64-NEXT: lui a3, %hi(.LCPI23_0)
949+
; RV64-NEXT: addi a3, a3, %lo(.LCPI23_0)
908950
; RV64-NEXT: vmv.s.x v0, a4
909951
; RV64-NEXT: csrr a4, vlenb
910952
; RV64-NEXT: li a5, 61
@@ -1092,8 +1134,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
10921134
; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
10931135
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
10941136
; RV64-NEXT: vslideup.vi v12, v16, 1, v0.t
1095-
; RV64-NEXT: lui a2, %hi(.LCPI21_1)
1096-
; RV64-NEXT: addi a2, a2, %lo(.LCPI21_1)
1137+
; RV64-NEXT: lui a2, %hi(.LCPI23_1)
1138+
; RV64-NEXT: addi a2, a2, %lo(.LCPI23_1)
10971139
; RV64-NEXT: li a3, 192
10981140
; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
10991141
; RV64-NEXT: vle16.v v6, (a2)
@@ -1127,8 +1169,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
11271169
; RV64-NEXT: vrgatherei16.vv v24, v16, v6
11281170
; RV64-NEXT: addi a2, sp, 16
11291171
; RV64-NEXT: vs8r.v v24, (a2) # vscale x 64-byte Folded Spill
1130-
; RV64-NEXT: lui a2, %hi(.LCPI21_2)
1131-
; RV64-NEXT: addi a2, a2, %lo(.LCPI21_2)
1172+
; RV64-NEXT: lui a2, %hi(.LCPI23_2)
1173+
; RV64-NEXT: addi a2, a2, %lo(.LCPI23_2)
11321174
; RV64-NEXT: li a3, 1040
11331175
; RV64-NEXT: vmv.s.x v0, a3
11341176
; RV64-NEXT: addi a1, a1, -2016
@@ -1212,12 +1254,12 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
12121254
; RV64-NEXT: add a1, sp, a1
12131255
; RV64-NEXT: addi a1, a1, 16
12141256
; RV64-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill
1215-
; RV64-NEXT: lui a1, %hi(.LCPI21_3)
1216-
; RV64-NEXT: addi a1, a1, %lo(.LCPI21_3)
1257+
; RV64-NEXT: lui a1, %hi(.LCPI23_3)
1258+
; RV64-NEXT: addi a1, a1, %lo(.LCPI23_3)
12171259
; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
12181260
; RV64-NEXT: vle16.v v20, (a1)
1219-
; RV64-NEXT: lui a1, %hi(.LCPI21_4)
1220-
; RV64-NEXT: addi a1, a1, %lo(.LCPI21_4)
1261+
; RV64-NEXT: lui a1, %hi(.LCPI23_4)
1262+
; RV64-NEXT: addi a1, a1, %lo(.LCPI23_4)
12211263
; RV64-NEXT: vle16.v v8, (a1)
12221264
; RV64-NEXT: csrr a1, vlenb
12231265
; RV64-NEXT: li a2, 77
@@ -1268,8 +1310,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
12681310
; RV64-NEXT: vl2r.v v8, (a1) # vscale x 16-byte Folded Reload
12691311
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
12701312
; RV64-NEXT: vrgatherei16.vv v0, v16, v8
1271-
; RV64-NEXT: lui a1, %hi(.LCPI21_5)
1272-
; RV64-NEXT: addi a1, a1, %lo(.LCPI21_5)
1313+
; RV64-NEXT: lui a1, %hi(.LCPI23_5)
1314+
; RV64-NEXT: addi a1, a1, %lo(.LCPI23_5)
12731315
; RV64-NEXT: vle16.v v20, (a1)
12741316
; RV64-NEXT: csrr a1, vlenb
12751317
; RV64-NEXT: li a2, 61
@@ -1586,6 +1628,24 @@ define void @vpstore_factor7(ptr %ptr, <2 x i16> %v0, <2 x i16> %v1, <2 x i16> %
15861628
ret void
15871629
}
15881630

1631+
define void @vpstore_factor7_masked(ptr %ptr, <2 x i16> %v0, <2 x i16> %v1, <2 x i16> %v2, <2 x i16> %v3, <2 x i16> %v4, <2 x i16> %v5, <2 x i16> %v6, <2 x i1> %m) {
1632+
; CHECK-LABEL: vpstore_factor7_masked:
1633+
; CHECK: # %bb.0:
1634+
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
1635+
; CHECK-NEXT: vsseg7e16.v v8, (a0), v0.t
1636+
; CHECK-NEXT: ret
1637+
%interleaved.mask = shufflevector <2 x i1> %m, <2 x i1> poison, <14 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
1638+
%s0 = shufflevector <2 x i16> %v0, <2 x i16> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1639+
%s1 = shufflevector <2 x i16> %v2, <2 x i16> %v3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1640+
%s2 = shufflevector <2 x i16> %v4, <2 x i16> %v5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1641+
%s3 = shufflevector <4 x i16> %s0, <4 x i16> %s1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1642+
%s4 = shufflevector <2 x i16> %v6, <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
1643+
%s5 = shufflevector <4 x i16> %s2, <4 x i16> %s4, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 undef, i32 undef>
1644+
%interleaved.vec = shufflevector <8 x i16> %s3, <8 x i16> %s5, <14 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13>
1645+
tail call void @llvm.vp.store.v14i16.p0(<14 x i16> %interleaved.vec, ptr %ptr, <14 x i1> %interleaved.mask, i32 14)
1646+
ret void
1647+
}
1648+
15891649
define void @vpstore_factor8(ptr %ptr, <2 x i16> %v0, <2 x i16> %v1, <2 x i16> %v2, <2 x i16> %v3, <2 x i16> %v4, <2 x i16> %v5, <2 x i16> %v6, <2 x i16> %v7) {
15901650
; CHECK-LABEL: vpstore_factor8:
15911651
; CHECK: # %bb.0:
@@ -1867,8 +1927,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_mask(ptr %ptr) {
18671927
; RV32-NEXT: vle32.v v12, (a0), v0.t
18681928
; RV32-NEXT: li a0, 36
18691929
; RV32-NEXT: vmv.s.x v20, a1
1870-
; RV32-NEXT: lui a1, %hi(.LCPI56_0)
1871-
; RV32-NEXT: addi a1, a1, %lo(.LCPI56_0)
1930+
; RV32-NEXT: lui a1, %hi(.LCPI59_0)
1931+
; RV32-NEXT: addi a1, a1, %lo(.LCPI59_0)
18721932
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
18731933
; RV32-NEXT: vle16.v v21, (a1)
18741934
; RV32-NEXT: vcompress.vm v8, v12, v11
@@ -1943,8 +2003,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_evl(ptr %ptr) {
19432003
; RV32-NEXT: vmv.s.x v10, a0
19442004
; RV32-NEXT: li a0, 146
19452005
; RV32-NEXT: vmv.s.x v11, a0
1946-
; RV32-NEXT: lui a0, %hi(.LCPI57_0)
1947-
; RV32-NEXT: addi a0, a0, %lo(.LCPI57_0)
2006+
; RV32-NEXT: lui a0, %hi(.LCPI60_0)
2007+
; RV32-NEXT: addi a0, a0, %lo(.LCPI60_0)
19482008
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
19492009
; RV32-NEXT: vle16.v v20, (a0)
19502010
; RV32-NEXT: li a0, 36

0 commit comments

Comments
 (0)