@@ -205,6 +205,48 @@ define {<4 x i32>, <4 x i32>} @vpload_factor2_interleaved_mask_intrinsic(ptr %pt
205
205
ret {<4 x i32 >, <4 x i32 >} %res1
206
206
}
207
207
208
+ define {<4 x i32 >, <4 x i32 >} @vpload_factor2_interleaved_mask_shuffle (ptr %ptr , <4 x i1 > %m ) {
209
+ ; CHECK-LABEL: vpload_factor2_interleaved_mask_shuffle:
210
+ ; CHECK: # %bb.0:
211
+ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
212
+ ; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t
213
+ ; CHECK-NEXT: ret
214
+ %interleaved.mask = shufflevector <4 x i1 > %m , <4 x i1 > poison, <8 x i32 > <i32 0 , i32 0 , i32 1 , i32 1 , i32 2 , i32 2 , i32 3 , i32 3 >
215
+ %interleaved.vec = tail call <8 x i32 > @llvm.vp.load.v8i32.p0 (ptr %ptr , <8 x i1 > %interleaved.mask , i32 8 )
216
+ %v0 = shufflevector <8 x i32 > %interleaved.vec , <8 x i32 > poison, <4 x i32 > <i32 0 , i32 2 , i32 4 , i32 6 >
217
+ %v1 = shufflevector <8 x i32 > %interleaved.vec , <8 x i32 > poison, <4 x i32 > <i32 1 , i32 3 , i32 5 , i32 7 >
218
+ %res0 = insertvalue {<4 x i32 >, <4 x i32 >} undef , <4 x i32 > %v0 , 0
219
+ %res1 = insertvalue {<4 x i32 >, <4 x i32 >} %res0 , <4 x i32 > %v1 , 1
220
+ ret {<4 x i32 >, <4 x i32 >} %res1
221
+ }
222
+
223
+ define {<4 x i32 >, <4 x i32 >} @vpload_factor2_interleaved_mask_shuffle2 (ptr %ptr , <2 x i1 > %m ) {
224
+ ; CHECK-LABEL: vpload_factor2_interleaved_mask_shuffle2:
225
+ ; CHECK: # %bb.0:
226
+ ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
227
+ ; CHECK-NEXT: vmv.v.i v8, 0
228
+ ; CHECK-NEXT: li a1, -1
229
+ ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
230
+ ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
231
+ ; CHECK-NEXT: vwaddu.vv v9, v8, v8
232
+ ; CHECK-NEXT: vwmaccu.vx v9, a1, v8
233
+ ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
234
+ ; CHECK-NEXT: vmsne.vi v0, v9, 0
235
+ ; CHECK-NEXT: vle32.v v10, (a0), v0.t
236
+ ; CHECK-NEXT: li a0, 32
237
+ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
238
+ ; CHECK-NEXT: vnsrl.wi v8, v10, 0
239
+ ; CHECK-NEXT: vnsrl.wx v9, v10, a0
240
+ ; CHECK-NEXT: ret
241
+ %interleaved.mask = shufflevector <2 x i1 > %m , <2 x i1 > poison, <8 x i32 > <i32 0 , i32 0 , i32 1 , i32 1 , i32 2 , i32 2 , i32 3 , i32 3 >
242
+ %interleaved.vec = tail call <8 x i32 > @llvm.vp.load.v8i32.p0 (ptr %ptr , <8 x i1 > %interleaved.mask , i32 4 )
243
+ %v0 = shufflevector <8 x i32 > %interleaved.vec , <8 x i32 > poison, <4 x i32 > <i32 0 , i32 2 , i32 4 , i32 6 >
244
+ %v1 = shufflevector <8 x i32 > %interleaved.vec , <8 x i32 > poison, <4 x i32 > <i32 1 , i32 3 , i32 5 , i32 7 >
245
+ %res0 = insertvalue {<4 x i32 >, <4 x i32 >} undef , <4 x i32 > %v0 , 0
246
+ %res1 = insertvalue {<4 x i32 >, <4 x i32 >} %res0 , <4 x i32 > %v1 , 1
247
+ ret {<4 x i32 >, <4 x i32 >} %res1
248
+ }
249
+
208
250
define {<4 x i32 >, <4 x i32 >, <4 x i32 >} @vpload_factor3 (ptr %ptr ) {
209
251
; CHECK-LABEL: vpload_factor3:
210
252
; CHECK: # %bb.0:
@@ -437,8 +479,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
437
479
; RV32-NEXT: li a2, 32
438
480
; RV32-NEXT: lui a3, 12
439
481
; RV32-NEXT: lui a6, 12291
440
- ; RV32-NEXT: lui a7, %hi(.LCPI21_0 )
441
- ; RV32-NEXT: addi a7, a7, %lo(.LCPI21_0 )
482
+ ; RV32-NEXT: lui a7, %hi(.LCPI23_0 )
483
+ ; RV32-NEXT: addi a7, a7, %lo(.LCPI23_0 )
442
484
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
443
485
; RV32-NEXT: vle32.v v24, (a5)
444
486
; RV32-NEXT: vmv.s.x v0, a3
@@ -523,12 +565,12 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
523
565
; RV32-NEXT: addi a1, a1, 16
524
566
; RV32-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill
525
567
; RV32-NEXT: lui a7, 49164
526
- ; RV32-NEXT: lui a1, %hi(.LCPI21_1 )
527
- ; RV32-NEXT: addi a1, a1, %lo(.LCPI21_1 )
568
+ ; RV32-NEXT: lui a1, %hi(.LCPI23_1 )
569
+ ; RV32-NEXT: addi a1, a1, %lo(.LCPI23_1 )
528
570
; RV32-NEXT: lui t2, 3
529
571
; RV32-NEXT: lui t1, 196656
530
- ; RV32-NEXT: lui a4, %hi(.LCPI21_3 )
531
- ; RV32-NEXT: addi a4, a4, %lo(.LCPI21_3 )
572
+ ; RV32-NEXT: lui a4, %hi(.LCPI23_3 )
573
+ ; RV32-NEXT: addi a4, a4, %lo(.LCPI23_3 )
532
574
; RV32-NEXT: lui t0, 786624
533
575
; RV32-NEXT: li a5, 48
534
576
; RV32-NEXT: lui a6, 768
@@ -707,8 +749,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
707
749
; RV32-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
708
750
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
709
751
; RV32-NEXT: vrgatherei16.vv v24, v8, v2
710
- ; RV32-NEXT: lui a1, %hi(.LCPI21_2 )
711
- ; RV32-NEXT: addi a1, a1, %lo(.LCPI21_2 )
752
+ ; RV32-NEXT: lui a1, %hi(.LCPI23_2 )
753
+ ; RV32-NEXT: addi a1, a1, %lo(.LCPI23_2 )
712
754
; RV32-NEXT: lui a3, 3073
713
755
; RV32-NEXT: addi a3, a3, -1024
714
756
; RV32-NEXT: vmv.s.x v0, a3
@@ -772,16 +814,16 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
772
814
; RV32-NEXT: vrgatherei16.vv v28, v8, v3
773
815
; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma
774
816
; RV32-NEXT: vmv.v.v v28, v24
775
- ; RV32-NEXT: lui a1, %hi(.LCPI21_4 )
776
- ; RV32-NEXT: addi a1, a1, %lo(.LCPI21_4 )
777
- ; RV32-NEXT: lui a2, %hi(.LCPI21_5 )
778
- ; RV32-NEXT: addi a2, a2, %lo(.LCPI21_5 )
817
+ ; RV32-NEXT: lui a1, %hi(.LCPI23_4 )
818
+ ; RV32-NEXT: addi a1, a1, %lo(.LCPI23_4 )
819
+ ; RV32-NEXT: lui a2, %hi(.LCPI23_5 )
820
+ ; RV32-NEXT: addi a2, a2, %lo(.LCPI23_5 )
779
821
; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
780
822
; RV32-NEXT: vle16.v v24, (a2)
781
823
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
782
824
; RV32-NEXT: vle16.v v8, (a1)
783
- ; RV32-NEXT: lui a1, %hi(.LCPI21_7 )
784
- ; RV32-NEXT: addi a1, a1, %lo(.LCPI21_7 )
825
+ ; RV32-NEXT: lui a1, %hi(.LCPI23_7 )
826
+ ; RV32-NEXT: addi a1, a1, %lo(.LCPI23_7 )
785
827
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
786
828
; RV32-NEXT: vle16.v v10, (a1)
787
829
; RV32-NEXT: csrr a1, vlenb
@@ -809,14 +851,14 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
809
851
; RV32-NEXT: vl8r.v v0, (a1) # vscale x 64-byte Folded Reload
810
852
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
811
853
; RV32-NEXT: vrgatherei16.vv v16, v0, v10
812
- ; RV32-NEXT: lui a1, %hi(.LCPI21_6 )
813
- ; RV32-NEXT: addi a1, a1, %lo(.LCPI21_6 )
814
- ; RV32-NEXT: lui a2, %hi(.LCPI21_8 )
815
- ; RV32-NEXT: addi a2, a2, %lo(.LCPI21_8 )
854
+ ; RV32-NEXT: lui a1, %hi(.LCPI23_6 )
855
+ ; RV32-NEXT: addi a1, a1, %lo(.LCPI23_6 )
856
+ ; RV32-NEXT: lui a2, %hi(.LCPI23_8 )
857
+ ; RV32-NEXT: addi a2, a2, %lo(.LCPI23_8 )
816
858
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
817
859
; RV32-NEXT: vle16.v v4, (a1)
818
- ; RV32-NEXT: lui a1, %hi(.LCPI21_9 )
819
- ; RV32-NEXT: addi a1, a1, %lo(.LCPI21_9 )
860
+ ; RV32-NEXT: lui a1, %hi(.LCPI23_9 )
861
+ ; RV32-NEXT: addi a1, a1, %lo(.LCPI23_9 )
820
862
; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
821
863
; RV32-NEXT: vle16.v v6, (a1)
822
864
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
@@ -903,8 +945,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
903
945
; RV64-NEXT: li a4, 128
904
946
; RV64-NEXT: lui a1, 1
905
947
; RV64-NEXT: vle64.v v8, (a3)
906
- ; RV64-NEXT: lui a3, %hi(.LCPI21_0 )
907
- ; RV64-NEXT: addi a3, a3, %lo(.LCPI21_0 )
948
+ ; RV64-NEXT: lui a3, %hi(.LCPI23_0 )
949
+ ; RV64-NEXT: addi a3, a3, %lo(.LCPI23_0 )
908
950
; RV64-NEXT: vmv.s.x v0, a4
909
951
; RV64-NEXT: csrr a4, vlenb
910
952
; RV64-NEXT: li a5, 61
@@ -1092,8 +1134,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
1092
1134
; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
1093
1135
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
1094
1136
; RV64-NEXT: vslideup.vi v12, v16, 1, v0.t
1095
- ; RV64-NEXT: lui a2, %hi(.LCPI21_1 )
1096
- ; RV64-NEXT: addi a2, a2, %lo(.LCPI21_1 )
1137
+ ; RV64-NEXT: lui a2, %hi(.LCPI23_1 )
1138
+ ; RV64-NEXT: addi a2, a2, %lo(.LCPI23_1 )
1097
1139
; RV64-NEXT: li a3, 192
1098
1140
; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
1099
1141
; RV64-NEXT: vle16.v v6, (a2)
@@ -1127,8 +1169,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
1127
1169
; RV64-NEXT: vrgatherei16.vv v24, v16, v6
1128
1170
; RV64-NEXT: addi a2, sp, 16
1129
1171
; RV64-NEXT: vs8r.v v24, (a2) # vscale x 64-byte Folded Spill
1130
- ; RV64-NEXT: lui a2, %hi(.LCPI21_2 )
1131
- ; RV64-NEXT: addi a2, a2, %lo(.LCPI21_2 )
1172
+ ; RV64-NEXT: lui a2, %hi(.LCPI23_2 )
1173
+ ; RV64-NEXT: addi a2, a2, %lo(.LCPI23_2 )
1132
1174
; RV64-NEXT: li a3, 1040
1133
1175
; RV64-NEXT: vmv.s.x v0, a3
1134
1176
; RV64-NEXT: addi a1, a1, -2016
@@ -1212,12 +1254,12 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
1212
1254
; RV64-NEXT: add a1, sp, a1
1213
1255
; RV64-NEXT: addi a1, a1, 16
1214
1256
; RV64-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill
1215
- ; RV64-NEXT: lui a1, %hi(.LCPI21_3 )
1216
- ; RV64-NEXT: addi a1, a1, %lo(.LCPI21_3 )
1257
+ ; RV64-NEXT: lui a1, %hi(.LCPI23_3 )
1258
+ ; RV64-NEXT: addi a1, a1, %lo(.LCPI23_3 )
1217
1259
; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
1218
1260
; RV64-NEXT: vle16.v v20, (a1)
1219
- ; RV64-NEXT: lui a1, %hi(.LCPI21_4 )
1220
- ; RV64-NEXT: addi a1, a1, %lo(.LCPI21_4 )
1261
+ ; RV64-NEXT: lui a1, %hi(.LCPI23_4 )
1262
+ ; RV64-NEXT: addi a1, a1, %lo(.LCPI23_4 )
1221
1263
; RV64-NEXT: vle16.v v8, (a1)
1222
1264
; RV64-NEXT: csrr a1, vlenb
1223
1265
; RV64-NEXT: li a2, 77
@@ -1268,8 +1310,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
1268
1310
; RV64-NEXT: vl2r.v v8, (a1) # vscale x 16-byte Folded Reload
1269
1311
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1270
1312
; RV64-NEXT: vrgatherei16.vv v0, v16, v8
1271
- ; RV64-NEXT: lui a1, %hi(.LCPI21_5 )
1272
- ; RV64-NEXT: addi a1, a1, %lo(.LCPI21_5 )
1313
+ ; RV64-NEXT: lui a1, %hi(.LCPI23_5 )
1314
+ ; RV64-NEXT: addi a1, a1, %lo(.LCPI23_5 )
1273
1315
; RV64-NEXT: vle16.v v20, (a1)
1274
1316
; RV64-NEXT: csrr a1, vlenb
1275
1317
; RV64-NEXT: li a2, 61
@@ -1586,6 +1628,24 @@ define void @vpstore_factor7(ptr %ptr, <2 x i16> %v0, <2 x i16> %v1, <2 x i16> %
1586
1628
ret void
1587
1629
}
1588
1630
1631
+ define void @vpstore_factor7_masked (ptr %ptr , <2 x i16 > %v0 , <2 x i16 > %v1 , <2 x i16 > %v2 , <2 x i16 > %v3 , <2 x i16 > %v4 , <2 x i16 > %v5 , <2 x i16 > %v6 , <2 x i1 > %m ) {
1632
+ ; CHECK-LABEL: vpstore_factor7_masked:
1633
+ ; CHECK: # %bb.0:
1634
+ ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
1635
+ ; CHECK-NEXT: vsseg7e16.v v8, (a0), v0.t
1636
+ ; CHECK-NEXT: ret
1637
+ %interleaved.mask = shufflevector <2 x i1 > %m , <2 x i1 > poison, <14 x i32 > <i32 0 , i32 0 , i32 0 , i32 0 , i32 0 , i32 0 , i32 0 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 >
1638
+ %s0 = shufflevector <2 x i16 > %v0 , <2 x i16 > %v1 , <4 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 >
1639
+ %s1 = shufflevector <2 x i16 > %v2 , <2 x i16 > %v3 , <4 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 >
1640
+ %s2 = shufflevector <2 x i16 > %v4 , <2 x i16 > %v5 , <4 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 >
1641
+ %s3 = shufflevector <4 x i16 > %s0 , <4 x i16 > %s1 , <8 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 >
1642
+ %s4 = shufflevector <2 x i16 > %v6 , <2 x i16 > poison, <4 x i32 > <i32 0 , i32 1 , i32 undef , i32 undef >
1643
+ %s5 = shufflevector <4 x i16 > %s2 , <4 x i16 > %s4 , <8 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 undef , i32 undef >
1644
+ %interleaved.vec = shufflevector <8 x i16 > %s3 , <8 x i16 > %s5 , <14 x i32 > <i32 0 , i32 2 , i32 4 , i32 6 , i32 8 , i32 10 , i32 12 , i32 1 , i32 3 , i32 5 , i32 7 , i32 9 , i32 11 , i32 13 >
1645
+ tail call void @llvm.vp.store.v14i16.p0 (<14 x i16 > %interleaved.vec , ptr %ptr , <14 x i1 > %interleaved.mask , i32 14 )
1646
+ ret void
1647
+ }
1648
+
1589
1649
define void @vpstore_factor8 (ptr %ptr , <2 x i16 > %v0 , <2 x i16 > %v1 , <2 x i16 > %v2 , <2 x i16 > %v3 , <2 x i16 > %v4 , <2 x i16 > %v5 , <2 x i16 > %v6 , <2 x i16 > %v7 ) {
1590
1650
; CHECK-LABEL: vpstore_factor8:
1591
1651
; CHECK: # %bb.0:
@@ -1867,8 +1927,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_mask(ptr %ptr) {
1867
1927
; RV32-NEXT: vle32.v v12, (a0), v0.t
1868
1928
; RV32-NEXT: li a0, 36
1869
1929
; RV32-NEXT: vmv.s.x v20, a1
1870
- ; RV32-NEXT: lui a1, %hi(.LCPI56_0 )
1871
- ; RV32-NEXT: addi a1, a1, %lo(.LCPI56_0 )
1930
+ ; RV32-NEXT: lui a1, %hi(.LCPI59_0 )
1931
+ ; RV32-NEXT: addi a1, a1, %lo(.LCPI59_0 )
1872
1932
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1873
1933
; RV32-NEXT: vle16.v v21, (a1)
1874
1934
; RV32-NEXT: vcompress.vm v8, v12, v11
@@ -1943,8 +2003,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_evl(ptr %ptr) {
1943
2003
; RV32-NEXT: vmv.s.x v10, a0
1944
2004
; RV32-NEXT: li a0, 146
1945
2005
; RV32-NEXT: vmv.s.x v11, a0
1946
- ; RV32-NEXT: lui a0, %hi(.LCPI57_0 )
1947
- ; RV32-NEXT: addi a0, a0, %lo(.LCPI57_0 )
2006
+ ; RV32-NEXT: lui a0, %hi(.LCPI60_0 )
2007
+ ; RV32-NEXT: addi a0, a0, %lo(.LCPI60_0 )
1948
2008
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1949
2009
; RV32-NEXT: vle16.v v20, (a0)
1950
2010
; RV32-NEXT: li a0, 36
0 commit comments