@@ -190,6 +190,20 @@ define {<4 x i32>, <4 x i32>} @vpload_factor2(ptr %ptr) {
190190 ret {<4 x i32 >, <4 x i32 >} %res1
191191}
192192
193+ define {<4 x i32 >, <4 x i32 >} @vpload_factor2_interleaved_mask_intrinsic (ptr %ptr , <4 x i1 > %m ) {
194+ ; CHECK-LABEL: vpload_factor2_interleaved_mask_intrinsic:
195+ ; CHECK: # %bb.0:
196+ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
197+ ; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t
198+ ; CHECK-NEXT: ret
199+ %interleaved.mask = call <8 x i1 > @llvm.vector.interleave2 (<4 x i1 > %m , <4 x i1 > %m )
200+ %interleaved.vec = tail call <8 x i32 > @llvm.vp.load.v8i32.p0 (ptr %ptr , <8 x i1 > %interleaved.mask , i32 8 )
201+ %v0 = shufflevector <8 x i32 > %interleaved.vec , <8 x i32 > poison, <4 x i32 > <i32 0 , i32 2 , i32 4 , i32 6 >
202+ %v1 = shufflevector <8 x i32 > %interleaved.vec , <8 x i32 > poison, <4 x i32 > <i32 1 , i32 3 , i32 5 , i32 7 >
203+ %res0 = insertvalue {<4 x i32 >, <4 x i32 >} undef , <4 x i32 > %v0 , 0
204+ %res1 = insertvalue {<4 x i32 >, <4 x i32 >} %res0 , <4 x i32 > %v1 , 1
205+ ret {<4 x i32 >, <4 x i32 >} %res1
206+ }
193207
194208define {<4 x i32 >, <4 x i32 >, <4 x i32 >} @vpload_factor3 (ptr %ptr ) {
195209; CHECK-LABEL: vpload_factor3:
@@ -423,8 +437,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
423437; RV32-NEXT: li a2, 32
424438; RV32-NEXT: lui a3, 12
425439; RV32-NEXT: lui a6, 12291
426- ; RV32-NEXT: lui a7, %hi(.LCPI20_0 )
427- ; RV32-NEXT: addi a7, a7, %lo(.LCPI20_0 )
440+ ; RV32-NEXT: lui a7, %hi(.LCPI21_0 )
441+ ; RV32-NEXT: addi a7, a7, %lo(.LCPI21_0 )
428442; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
429443; RV32-NEXT: vle32.v v24, (a5)
430444; RV32-NEXT: vmv.s.x v0, a3
@@ -509,12 +523,12 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
509523; RV32-NEXT: addi a1, a1, 16
510524; RV32-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill
511525; RV32-NEXT: lui a7, 49164
512- ; RV32-NEXT: lui a1, %hi(.LCPI20_1 )
513- ; RV32-NEXT: addi a1, a1, %lo(.LCPI20_1 )
526+ ; RV32-NEXT: lui a1, %hi(.LCPI21_1 )
527+ ; RV32-NEXT: addi a1, a1, %lo(.LCPI21_1 )
514528; RV32-NEXT: lui t2, 3
515529; RV32-NEXT: lui t1, 196656
516- ; RV32-NEXT: lui a4, %hi(.LCPI20_3 )
517- ; RV32-NEXT: addi a4, a4, %lo(.LCPI20_3 )
530+ ; RV32-NEXT: lui a4, %hi(.LCPI21_3 )
531+ ; RV32-NEXT: addi a4, a4, %lo(.LCPI21_3 )
518532; RV32-NEXT: lui t0, 786624
519533; RV32-NEXT: li a5, 48
520534; RV32-NEXT: lui a6, 768
@@ -693,8 +707,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
693707; RV32-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
694708; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
695709; RV32-NEXT: vrgatherei16.vv v24, v8, v2
696- ; RV32-NEXT: lui a1, %hi(.LCPI20_2 )
697- ; RV32-NEXT: addi a1, a1, %lo(.LCPI20_2 )
710+ ; RV32-NEXT: lui a1, %hi(.LCPI21_2 )
711+ ; RV32-NEXT: addi a1, a1, %lo(.LCPI21_2 )
698712; RV32-NEXT: lui a3, 3073
699713; RV32-NEXT: addi a3, a3, -1024
700714; RV32-NEXT: vmv.s.x v0, a3
@@ -758,16 +772,16 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
758772; RV32-NEXT: vrgatherei16.vv v28, v8, v3
759773; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma
760774; RV32-NEXT: vmv.v.v v28, v24
761- ; RV32-NEXT: lui a1, %hi(.LCPI20_4 )
762- ; RV32-NEXT: addi a1, a1, %lo(.LCPI20_4 )
763- ; RV32-NEXT: lui a2, %hi(.LCPI20_5 )
764- ; RV32-NEXT: addi a2, a2, %lo(.LCPI20_5 )
775+ ; RV32-NEXT: lui a1, %hi(.LCPI21_4 )
776+ ; RV32-NEXT: addi a1, a1, %lo(.LCPI21_4 )
777+ ; RV32-NEXT: lui a2, %hi(.LCPI21_5 )
778+ ; RV32-NEXT: addi a2, a2, %lo(.LCPI21_5 )
765779; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
766780; RV32-NEXT: vle16.v v24, (a2)
767781; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
768782; RV32-NEXT: vle16.v v8, (a1)
769- ; RV32-NEXT: lui a1, %hi(.LCPI20_7 )
770- ; RV32-NEXT: addi a1, a1, %lo(.LCPI20_7 )
783+ ; RV32-NEXT: lui a1, %hi(.LCPI21_7 )
784+ ; RV32-NEXT: addi a1, a1, %lo(.LCPI21_7 )
771785; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
772786; RV32-NEXT: vle16.v v10, (a1)
773787; RV32-NEXT: csrr a1, vlenb
@@ -795,14 +809,14 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
795809; RV32-NEXT: vl8r.v v0, (a1) # vscale x 64-byte Folded Reload
796810; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
797811; RV32-NEXT: vrgatherei16.vv v16, v0, v10
798- ; RV32-NEXT: lui a1, %hi(.LCPI20_6 )
799- ; RV32-NEXT: addi a1, a1, %lo(.LCPI20_6 )
800- ; RV32-NEXT: lui a2, %hi(.LCPI20_8 )
801- ; RV32-NEXT: addi a2, a2, %lo(.LCPI20_8 )
812+ ; RV32-NEXT: lui a1, %hi(.LCPI21_6 )
813+ ; RV32-NEXT: addi a1, a1, %lo(.LCPI21_6 )
814+ ; RV32-NEXT: lui a2, %hi(.LCPI21_8 )
815+ ; RV32-NEXT: addi a2, a2, %lo(.LCPI21_8 )
802816; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
803817; RV32-NEXT: vle16.v v4, (a1)
804- ; RV32-NEXT: lui a1, %hi(.LCPI20_9 )
805- ; RV32-NEXT: addi a1, a1, %lo(.LCPI20_9 )
818+ ; RV32-NEXT: lui a1, %hi(.LCPI21_9 )
819+ ; RV32-NEXT: addi a1, a1, %lo(.LCPI21_9 )
806820; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
807821; RV32-NEXT: vle16.v v6, (a1)
808822; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
@@ -889,8 +903,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
889903; RV64-NEXT: li a4, 128
890904; RV64-NEXT: lui a1, 1
891905; RV64-NEXT: vle64.v v8, (a3)
892- ; RV64-NEXT: lui a3, %hi(.LCPI20_0 )
893- ; RV64-NEXT: addi a3, a3, %lo(.LCPI20_0 )
906+ ; RV64-NEXT: lui a3, %hi(.LCPI21_0 )
907+ ; RV64-NEXT: addi a3, a3, %lo(.LCPI21_0 )
894908; RV64-NEXT: vmv.s.x v0, a4
895909; RV64-NEXT: csrr a4, vlenb
896910; RV64-NEXT: li a5, 61
@@ -1078,8 +1092,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
10781092; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
10791093; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
10801094; RV64-NEXT: vslideup.vi v12, v16, 1, v0.t
1081- ; RV64-NEXT: lui a2, %hi(.LCPI20_1 )
1082- ; RV64-NEXT: addi a2, a2, %lo(.LCPI20_1 )
1095+ ; RV64-NEXT: lui a2, %hi(.LCPI21_1 )
1096+ ; RV64-NEXT: addi a2, a2, %lo(.LCPI21_1 )
10831097; RV64-NEXT: li a3, 192
10841098; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
10851099; RV64-NEXT: vle16.v v6, (a2)
@@ -1113,8 +1127,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
11131127; RV64-NEXT: vrgatherei16.vv v24, v16, v6
11141128; RV64-NEXT: addi a2, sp, 16
11151129; RV64-NEXT: vs8r.v v24, (a2) # vscale x 64-byte Folded Spill
1116- ; RV64-NEXT: lui a2, %hi(.LCPI20_2 )
1117- ; RV64-NEXT: addi a2, a2, %lo(.LCPI20_2 )
1130+ ; RV64-NEXT: lui a2, %hi(.LCPI21_2 )
1131+ ; RV64-NEXT: addi a2, a2, %lo(.LCPI21_2 )
11181132; RV64-NEXT: li a3, 1040
11191133; RV64-NEXT: vmv.s.x v0, a3
11201134; RV64-NEXT: addi a1, a1, -2016
@@ -1198,12 +1212,12 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
11981212; RV64-NEXT: add a1, sp, a1
11991213; RV64-NEXT: addi a1, a1, 16
12001214; RV64-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill
1201- ; RV64-NEXT: lui a1, %hi(.LCPI20_3 )
1202- ; RV64-NEXT: addi a1, a1, %lo(.LCPI20_3 )
1215+ ; RV64-NEXT: lui a1, %hi(.LCPI21_3 )
1216+ ; RV64-NEXT: addi a1, a1, %lo(.LCPI21_3 )
12031217; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
12041218; RV64-NEXT: vle16.v v20, (a1)
1205- ; RV64-NEXT: lui a1, %hi(.LCPI20_4 )
1206- ; RV64-NEXT: addi a1, a1, %lo(.LCPI20_4 )
1219+ ; RV64-NEXT: lui a1, %hi(.LCPI21_4 )
1220+ ; RV64-NEXT: addi a1, a1, %lo(.LCPI21_4 )
12071221; RV64-NEXT: vle16.v v8, (a1)
12081222; RV64-NEXT: csrr a1, vlenb
12091223; RV64-NEXT: li a2, 77
@@ -1254,8 +1268,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
12541268; RV64-NEXT: vl2r.v v8, (a1) # vscale x 16-byte Folded Reload
12551269; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
12561270; RV64-NEXT: vrgatherei16.vv v0, v16, v8
1257- ; RV64-NEXT: lui a1, %hi(.LCPI20_5 )
1258- ; RV64-NEXT: addi a1, a1, %lo(.LCPI20_5 )
1271+ ; RV64-NEXT: lui a1, %hi(.LCPI21_5 )
1272+ ; RV64-NEXT: addi a1, a1, %lo(.LCPI21_5 )
12591273; RV64-NEXT: vle16.v v20, (a1)
12601274; RV64-NEXT: csrr a1, vlenb
12611275; RV64-NEXT: li a2, 61
@@ -1472,6 +1486,19 @@ define void @vpstore_factor2(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1) {
14721486 ret void
14731487}
14741488
1489+ define void @vpstore_factor2_interleaved_mask_intrinsic (ptr %ptr , <4 x i32 > %v0 , <4 x i32 > %v1 , <4 x i1 > %m ) {
1490+ ; CHECK-LABEL: vpstore_factor2_interleaved_mask_intrinsic:
1491+ ; CHECK: # %bb.0:
1492+ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1493+ ; CHECK-NEXT: vsseg2e32.v v8, (a0), v0.t
1494+ ; CHECK-NEXT: ret
1495+ %interleaved.mask = call <8 x i1 > @llvm.vector.interleave2 (<4 x i1 > %m , <4 x i1 > %m )
1496+ %interleaved.vec = shufflevector <4 x i32 > %v0 , <4 x i32 > %v1 , <8 x i32 > <i32 0 , i32 4 , i32 1 , i32 5 , i32 2 , i32 6 , i32 3 , i32 7 >
1497+ tail call void @llvm.vp.store.v8i32.p0 (<8 x i32 > %interleaved.vec , ptr %ptr , <8 x i1 > %interleaved.mask , i32 8 )
1498+ ret void
1499+ }
1500+
1501+
14751502define void @vpstore_factor3 (ptr %ptr , <4 x i32 > %v0 , <4 x i32 > %v1 , <4 x i32 > %v2 ) {
14761503; CHECK-LABEL: vpstore_factor3:
14771504; CHECK: # %bb.0:
@@ -1839,8 +1866,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_mask(ptr %ptr) {
18391866; RV32-NEXT: vle32.v v12, (a0), v0.t
18401867; RV32-NEXT: li a0, 36
18411868; RV32-NEXT: vmv.s.x v20, a1
1842- ; RV32-NEXT: lui a1, %hi(.LCPI54_0 )
1843- ; RV32-NEXT: addi a1, a1, %lo(.LCPI54_0 )
1869+ ; RV32-NEXT: lui a1, %hi(.LCPI56_0 )
1870+ ; RV32-NEXT: addi a1, a1, %lo(.LCPI56_0 )
18441871; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
18451872; RV32-NEXT: vle16.v v21, (a1)
18461873; RV32-NEXT: vcompress.vm v8, v12, v11
@@ -1915,8 +1942,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_evl(ptr %ptr) {
19151942; RV32-NEXT: vmv.s.x v10, a0
19161943; RV32-NEXT: li a0, 146
19171944; RV32-NEXT: vmv.s.x v11, a0
1918- ; RV32-NEXT: lui a0, %hi(.LCPI55_0 )
1919- ; RV32-NEXT: addi a0, a0, %lo(.LCPI55_0 )
1945+ ; RV32-NEXT: lui a0, %hi(.LCPI57_0 )
1946+ ; RV32-NEXT: addi a0, a0, %lo(.LCPI57_0 )
19201947; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
19211948; RV32-NEXT: vle16.v v20, (a0)
19221949; RV32-NEXT: li a0, 36
0 commit comments