@@ -205,6 +205,48 @@ define {<4 x i32>, <4 x i32>} @vpload_factor2_interleaved_mask_intrinsic(ptr %pt
205205 ret {<4 x i32 >, <4 x i32 >} %res1
206206}
207207
208+ define {<4 x i32 >, <4 x i32 >} @vpload_factor2_interleaved_mask_shuffle (ptr %ptr , <4 x i1 > %m ) {
209+ ; CHECK-LABEL: vpload_factor2_interleaved_mask_shuffle:
210+ ; CHECK: # %bb.0:
211+ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
212+ ; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t
213+ ; CHECK-NEXT: ret
214+ %interleaved.mask = shufflevector <4 x i1 > %m , <4 x i1 > poison, <8 x i32 > <i32 0 , i32 0 , i32 1 , i32 1 , i32 2 , i32 2 , i32 3 , i32 3 >
215+ %interleaved.vec = tail call <8 x i32 > @llvm.vp.load.v8i32.p0 (ptr %ptr , <8 x i1 > %interleaved.mask , i32 8 )
216+ %v0 = shufflevector <8 x i32 > %interleaved.vec , <8 x i32 > poison, <4 x i32 > <i32 0 , i32 2 , i32 4 , i32 6 >
217+ %v1 = shufflevector <8 x i32 > %interleaved.vec , <8 x i32 > poison, <4 x i32 > <i32 1 , i32 3 , i32 5 , i32 7 >
218+ %res0 = insertvalue {<4 x i32 >, <4 x i32 >} undef , <4 x i32 > %v0 , 0
219+ %res1 = insertvalue {<4 x i32 >, <4 x i32 >} %res0 , <4 x i32 > %v1 , 1
220+ ret {<4 x i32 >, <4 x i32 >} %res1
221+ }
222+
223+ define {<4 x i32 >, <4 x i32 >} @vpload_factor2_interleaved_mask_shuffle2 (ptr %ptr , <2 x i1 > %m ) {
224+ ; CHECK-LABEL: vpload_factor2_interleaved_mask_shuffle2:
225+ ; CHECK: # %bb.0:
226+ ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
227+ ; CHECK-NEXT: vmv.v.i v8, 0
228+ ; CHECK-NEXT: li a1, -1
229+ ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
230+ ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
231+ ; CHECK-NEXT: vwaddu.vv v9, v8, v8
232+ ; CHECK-NEXT: vwmaccu.vx v9, a1, v8
233+ ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
234+ ; CHECK-NEXT: vmsne.vi v0, v9, 0
235+ ; CHECK-NEXT: vle32.v v10, (a0), v0.t
236+ ; CHECK-NEXT: li a0, 32
237+ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
238+ ; CHECK-NEXT: vnsrl.wi v8, v10, 0
239+ ; CHECK-NEXT: vnsrl.wx v9, v10, a0
240+ ; CHECK-NEXT: ret
241+ %interleaved.mask = shufflevector <2 x i1 > %m , <2 x i1 > poison, <8 x i32 > <i32 0 , i32 0 , i32 1 , i32 1 , i32 2 , i32 2 , i32 3 , i32 3 >
242+ %interleaved.vec = tail call <8 x i32 > @llvm.vp.load.v8i32.p0 (ptr %ptr , <8 x i1 > %interleaved.mask , i32 4 )
243+ %v0 = shufflevector <8 x i32 > %interleaved.vec , <8 x i32 > poison, <4 x i32 > <i32 0 , i32 2 , i32 4 , i32 6 >
244+ %v1 = shufflevector <8 x i32 > %interleaved.vec , <8 x i32 > poison, <4 x i32 > <i32 1 , i32 3 , i32 5 , i32 7 >
245+ %res0 = insertvalue {<4 x i32 >, <4 x i32 >} undef , <4 x i32 > %v0 , 0
246+ %res1 = insertvalue {<4 x i32 >, <4 x i32 >} %res0 , <4 x i32 > %v1 , 1
247+ ret {<4 x i32 >, <4 x i32 >} %res1
248+ }
249+
208250define {<4 x i32 >, <4 x i32 >, <4 x i32 >} @vpload_factor3 (ptr %ptr ) {
209251; CHECK-LABEL: vpload_factor3:
210252; CHECK: # %bb.0:
@@ -437,8 +479,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
437479; RV32-NEXT: li a2, 32
438480; RV32-NEXT: lui a3, 12
439481; RV32-NEXT: lui a6, 12291
440- ; RV32-NEXT: lui a7, %hi(.LCPI21_0 )
441- ; RV32-NEXT: addi a7, a7, %lo(.LCPI21_0 )
482+ ; RV32-NEXT: lui a7, %hi(.LCPI23_0 )
483+ ; RV32-NEXT: addi a7, a7, %lo(.LCPI23_0 )
442484; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
443485; RV32-NEXT: vle32.v v24, (a5)
444486; RV32-NEXT: vmv.s.x v0, a3
@@ -523,12 +565,12 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
523565; RV32-NEXT: addi a1, a1, 16
524566; RV32-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill
525567; RV32-NEXT: lui a7, 49164
526- ; RV32-NEXT: lui a1, %hi(.LCPI21_1 )
527- ; RV32-NEXT: addi a1, a1, %lo(.LCPI21_1 )
568+ ; RV32-NEXT: lui a1, %hi(.LCPI23_1 )
569+ ; RV32-NEXT: addi a1, a1, %lo(.LCPI23_1 )
528570; RV32-NEXT: lui t2, 3
529571; RV32-NEXT: lui t1, 196656
530- ; RV32-NEXT: lui a4, %hi(.LCPI21_3 )
531- ; RV32-NEXT: addi a4, a4, %lo(.LCPI21_3 )
572+ ; RV32-NEXT: lui a4, %hi(.LCPI23_3 )
573+ ; RV32-NEXT: addi a4, a4, %lo(.LCPI23_3 )
532574; RV32-NEXT: lui t0, 786624
533575; RV32-NEXT: li a5, 48
534576; RV32-NEXT: lui a6, 768
@@ -707,8 +749,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
707749; RV32-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
708750; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
709751; RV32-NEXT: vrgatherei16.vv v24, v8, v2
710- ; RV32-NEXT: lui a1, %hi(.LCPI21_2 )
711- ; RV32-NEXT: addi a1, a1, %lo(.LCPI21_2 )
752+ ; RV32-NEXT: lui a1, %hi(.LCPI23_2 )
753+ ; RV32-NEXT: addi a1, a1, %lo(.LCPI23_2 )
712754; RV32-NEXT: lui a3, 3073
713755; RV32-NEXT: addi a3, a3, -1024
714756; RV32-NEXT: vmv.s.x v0, a3
@@ -772,16 +814,16 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
772814; RV32-NEXT: vrgatherei16.vv v28, v8, v3
773815; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma
774816; RV32-NEXT: vmv.v.v v28, v24
775- ; RV32-NEXT: lui a1, %hi(.LCPI21_4 )
776- ; RV32-NEXT: addi a1, a1, %lo(.LCPI21_4 )
777- ; RV32-NEXT: lui a2, %hi(.LCPI21_5 )
778- ; RV32-NEXT: addi a2, a2, %lo(.LCPI21_5 )
817+ ; RV32-NEXT: lui a1, %hi(.LCPI23_4 )
818+ ; RV32-NEXT: addi a1, a1, %lo(.LCPI23_4 )
819+ ; RV32-NEXT: lui a2, %hi(.LCPI23_5 )
820+ ; RV32-NEXT: addi a2, a2, %lo(.LCPI23_5 )
779821; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
780822; RV32-NEXT: vle16.v v24, (a2)
781823; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
782824; RV32-NEXT: vle16.v v8, (a1)
783- ; RV32-NEXT: lui a1, %hi(.LCPI21_7 )
784- ; RV32-NEXT: addi a1, a1, %lo(.LCPI21_7 )
825+ ; RV32-NEXT: lui a1, %hi(.LCPI23_7 )
826+ ; RV32-NEXT: addi a1, a1, %lo(.LCPI23_7 )
785827; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
786828; RV32-NEXT: vle16.v v10, (a1)
787829; RV32-NEXT: csrr a1, vlenb
@@ -809,14 +851,14 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
809851; RV32-NEXT: vl8r.v v0, (a1) # vscale x 64-byte Folded Reload
810852; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
811853; RV32-NEXT: vrgatherei16.vv v16, v0, v10
812- ; RV32-NEXT: lui a1, %hi(.LCPI21_6 )
813- ; RV32-NEXT: addi a1, a1, %lo(.LCPI21_6 )
814- ; RV32-NEXT: lui a2, %hi(.LCPI21_8 )
815- ; RV32-NEXT: addi a2, a2, %lo(.LCPI21_8 )
854+ ; RV32-NEXT: lui a1, %hi(.LCPI23_6 )
855+ ; RV32-NEXT: addi a1, a1, %lo(.LCPI23_6 )
856+ ; RV32-NEXT: lui a2, %hi(.LCPI23_8 )
857+ ; RV32-NEXT: addi a2, a2, %lo(.LCPI23_8 )
816858; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
817859; RV32-NEXT: vle16.v v4, (a1)
818- ; RV32-NEXT: lui a1, %hi(.LCPI21_9 )
819- ; RV32-NEXT: addi a1, a1, %lo(.LCPI21_9 )
860+ ; RV32-NEXT: lui a1, %hi(.LCPI23_9 )
861+ ; RV32-NEXT: addi a1, a1, %lo(.LCPI23_9 )
820862; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
821863; RV32-NEXT: vle16.v v6, (a1)
822864; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
@@ -903,8 +945,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
903945; RV64-NEXT: li a4, 128
904946; RV64-NEXT: lui a1, 1
905947; RV64-NEXT: vle64.v v8, (a3)
906- ; RV64-NEXT: lui a3, %hi(.LCPI21_0 )
907- ; RV64-NEXT: addi a3, a3, %lo(.LCPI21_0 )
948+ ; RV64-NEXT: lui a3, %hi(.LCPI23_0 )
949+ ; RV64-NEXT: addi a3, a3, %lo(.LCPI23_0 )
908950; RV64-NEXT: vmv.s.x v0, a4
909951; RV64-NEXT: csrr a4, vlenb
910952; RV64-NEXT: li a5, 61
@@ -1092,8 +1134,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
10921134; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
10931135; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
10941136; RV64-NEXT: vslideup.vi v12, v16, 1, v0.t
1095- ; RV64-NEXT: lui a2, %hi(.LCPI21_1 )
1096- ; RV64-NEXT: addi a2, a2, %lo(.LCPI21_1 )
1137+ ; RV64-NEXT: lui a2, %hi(.LCPI23_1 )
1138+ ; RV64-NEXT: addi a2, a2, %lo(.LCPI23_1 )
10971139; RV64-NEXT: li a3, 192
10981140; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
10991141; RV64-NEXT: vle16.v v6, (a2)
@@ -1127,8 +1169,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
11271169; RV64-NEXT: vrgatherei16.vv v24, v16, v6
11281170; RV64-NEXT: addi a2, sp, 16
11291171; RV64-NEXT: vs8r.v v24, (a2) # vscale x 64-byte Folded Spill
1130- ; RV64-NEXT: lui a2, %hi(.LCPI21_2 )
1131- ; RV64-NEXT: addi a2, a2, %lo(.LCPI21_2 )
1172+ ; RV64-NEXT: lui a2, %hi(.LCPI23_2 )
1173+ ; RV64-NEXT: addi a2, a2, %lo(.LCPI23_2 )
11321174; RV64-NEXT: li a3, 1040
11331175; RV64-NEXT: vmv.s.x v0, a3
11341176; RV64-NEXT: addi a1, a1, -2016
@@ -1212,12 +1254,12 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
12121254; RV64-NEXT: add a1, sp, a1
12131255; RV64-NEXT: addi a1, a1, 16
12141256; RV64-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill
1215- ; RV64-NEXT: lui a1, %hi(.LCPI21_3 )
1216- ; RV64-NEXT: addi a1, a1, %lo(.LCPI21_3 )
1257+ ; RV64-NEXT: lui a1, %hi(.LCPI23_3 )
1258+ ; RV64-NEXT: addi a1, a1, %lo(.LCPI23_3 )
12171259; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
12181260; RV64-NEXT: vle16.v v20, (a1)
1219- ; RV64-NEXT: lui a1, %hi(.LCPI21_4 )
1220- ; RV64-NEXT: addi a1, a1, %lo(.LCPI21_4 )
1261+ ; RV64-NEXT: lui a1, %hi(.LCPI23_4 )
1262+ ; RV64-NEXT: addi a1, a1, %lo(.LCPI23_4 )
12211263; RV64-NEXT: vle16.v v8, (a1)
12221264; RV64-NEXT: csrr a1, vlenb
12231265; RV64-NEXT: li a2, 77
@@ -1268,8 +1310,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
12681310; RV64-NEXT: vl2r.v v8, (a1) # vscale x 16-byte Folded Reload
12691311; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
12701312; RV64-NEXT: vrgatherei16.vv v0, v16, v8
1271- ; RV64-NEXT: lui a1, %hi(.LCPI21_5 )
1272- ; RV64-NEXT: addi a1, a1, %lo(.LCPI21_5 )
1313+ ; RV64-NEXT: lui a1, %hi(.LCPI23_5 )
1314+ ; RV64-NEXT: addi a1, a1, %lo(.LCPI23_5 )
12731315; RV64-NEXT: vle16.v v20, (a1)
12741316; RV64-NEXT: csrr a1, vlenb
12751317; RV64-NEXT: li a2, 61
@@ -1586,6 +1628,24 @@ define void @vpstore_factor7(ptr %ptr, <2 x i16> %v0, <2 x i16> %v1, <2 x i16> %
15861628 ret void
15871629}
15881630
1631+ define void @vpstore_factor7_masked (ptr %ptr , <2 x i16 > %v0 , <2 x i16 > %v1 , <2 x i16 > %v2 , <2 x i16 > %v3 , <2 x i16 > %v4 , <2 x i16 > %v5 , <2 x i16 > %v6 , <2 x i1 > %m ) {
1632+ ; CHECK-LABEL: vpstore_factor7_masked:
1633+ ; CHECK: # %bb.0:
1634+ ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
1635+ ; CHECK-NEXT: vsseg7e16.v v8, (a0), v0.t
1636+ ; CHECK-NEXT: ret
1637+ %interleaved.mask = shufflevector <2 x i1 > %m , <2 x i1 > poison, <14 x i32 > <i32 0 , i32 0 , i32 0 , i32 0 , i32 0 , i32 0 , i32 0 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 >
1638+ %s0 = shufflevector <2 x i16 > %v0 , <2 x i16 > %v1 , <4 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 >
1639+ %s1 = shufflevector <2 x i16 > %v2 , <2 x i16 > %v3 , <4 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 >
1640+ %s2 = shufflevector <2 x i16 > %v4 , <2 x i16 > %v5 , <4 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 >
1641+ %s3 = shufflevector <4 x i16 > %s0 , <4 x i16 > %s1 , <8 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 >
1642+ %s4 = shufflevector <2 x i16 > %v6 , <2 x i16 > poison, <4 x i32 > <i32 0 , i32 1 , i32 undef , i32 undef >
1643+ %s5 = shufflevector <4 x i16 > %s2 , <4 x i16 > %s4 , <8 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 undef , i32 undef >
1644+ %interleaved.vec = shufflevector <8 x i16 > %s3 , <8 x i16 > %s5 , <14 x i32 > <i32 0 , i32 2 , i32 4 , i32 6 , i32 8 , i32 10 , i32 12 , i32 1 , i32 3 , i32 5 , i32 7 , i32 9 , i32 11 , i32 13 >
1645+ tail call void @llvm.vp.store.v14i16.p0 (<14 x i16 > %interleaved.vec , ptr %ptr , <14 x i1 > %interleaved.mask , i32 14 )
1646+ ret void
1647+ }
1648+
15891649define void @vpstore_factor8 (ptr %ptr , <2 x i16 > %v0 , <2 x i16 > %v1 , <2 x i16 > %v2 , <2 x i16 > %v3 , <2 x i16 > %v4 , <2 x i16 > %v5 , <2 x i16 > %v6 , <2 x i16 > %v7 ) {
15901650; CHECK-LABEL: vpstore_factor8:
15911651; CHECK: # %bb.0:
@@ -1867,8 +1927,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_mask(ptr %ptr) {
18671927; RV32-NEXT: vle32.v v12, (a0), v0.t
18681928; RV32-NEXT: li a0, 36
18691929; RV32-NEXT: vmv.s.x v20, a1
1870- ; RV32-NEXT: lui a1, %hi(.LCPI56_0 )
1871- ; RV32-NEXT: addi a1, a1, %lo(.LCPI56_0 )
1930+ ; RV32-NEXT: lui a1, %hi(.LCPI59_0 )
1931+ ; RV32-NEXT: addi a1, a1, %lo(.LCPI59_0 )
18721932; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
18731933; RV32-NEXT: vle16.v v21, (a1)
18741934; RV32-NEXT: vcompress.vm v8, v12, v11
@@ -1943,8 +2003,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_evl(ptr %ptr) {
19432003; RV32-NEXT: vmv.s.x v10, a0
19442004; RV32-NEXT: li a0, 146
19452005; RV32-NEXT: vmv.s.x v11, a0
1946- ; RV32-NEXT: lui a0, %hi(.LCPI57_0 )
1947- ; RV32-NEXT: addi a0, a0, %lo(.LCPI57_0 )
2006+ ; RV32-NEXT: lui a0, %hi(.LCPI60_0 )
2007+ ; RV32-NEXT: addi a0, a0, %lo(.LCPI60_0 )
19482008; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
19492009; RV32-NEXT: vle16.v v20, (a0)
19502010; RV32-NEXT: li a0, 36
0 commit comments