@@ -190,6 +190,20 @@ define {<4 x i32>, <4 x i32>} @vpload_factor2(ptr %ptr) {
190
190
ret {<4 x i32 >, <4 x i32 >} %res1
191
191
}
192
192
193
+ define {<4 x i32 >, <4 x i32 >} @vpload_factor2_interleaved_mask_intrinsic (ptr %ptr , <4 x i1 > %m ) {
194
+ ; CHECK-LABEL: vpload_factor2_interleaved_mask_intrinsic:
195
+ ; CHECK: # %bb.0:
196
+ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
197
+ ; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t
198
+ ; CHECK-NEXT: ret
199
+ %interleaved.mask = call <8 x i1 > @llvm.vector.interleave2 (<4 x i1 > %m , <4 x i1 > %m )
200
+ %interleaved.vec = tail call <8 x i32 > @llvm.vp.load.v8i32.p0 (ptr %ptr , <8 x i1 > %interleaved.mask , i32 8 )
201
+ %v0 = shufflevector <8 x i32 > %interleaved.vec , <8 x i32 > poison, <4 x i32 > <i32 0 , i32 2 , i32 4 , i32 6 >
202
+ %v1 = shufflevector <8 x i32 > %interleaved.vec , <8 x i32 > poison, <4 x i32 > <i32 1 , i32 3 , i32 5 , i32 7 >
203
+ %res0 = insertvalue {<4 x i32 >, <4 x i32 >} undef , <4 x i32 > %v0 , 0
204
+ %res1 = insertvalue {<4 x i32 >, <4 x i32 >} %res0 , <4 x i32 > %v1 , 1
205
+ ret {<4 x i32 >, <4 x i32 >} %res1
206
+ }
193
207
194
208
define {<4 x i32 >, <4 x i32 >, <4 x i32 >} @vpload_factor3 (ptr %ptr ) {
195
209
; CHECK-LABEL: vpload_factor3:
@@ -423,8 +437,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
423
437
; RV32-NEXT: li a2, 32
424
438
; RV32-NEXT: lui a3, 12
425
439
; RV32-NEXT: lui a6, 12291
426
- ; RV32-NEXT: lui a7, %hi(.LCPI20_0 )
427
- ; RV32-NEXT: addi a7, a7, %lo(.LCPI20_0 )
440
+ ; RV32-NEXT: lui a7, %hi(.LCPI21_0 )
441
+ ; RV32-NEXT: addi a7, a7, %lo(.LCPI21_0 )
428
442
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
429
443
; RV32-NEXT: vle32.v v24, (a5)
430
444
; RV32-NEXT: vmv.s.x v0, a3
@@ -509,12 +523,12 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
509
523
; RV32-NEXT: addi a1, a1, 16
510
524
; RV32-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill
511
525
; RV32-NEXT: lui a7, 49164
512
- ; RV32-NEXT: lui a1, %hi(.LCPI20_1 )
513
- ; RV32-NEXT: addi a1, a1, %lo(.LCPI20_1 )
526
+ ; RV32-NEXT: lui a1, %hi(.LCPI21_1 )
527
+ ; RV32-NEXT: addi a1, a1, %lo(.LCPI21_1 )
514
528
; RV32-NEXT: lui t2, 3
515
529
; RV32-NEXT: lui t1, 196656
516
- ; RV32-NEXT: lui a4, %hi(.LCPI20_3 )
517
- ; RV32-NEXT: addi a4, a4, %lo(.LCPI20_3 )
530
+ ; RV32-NEXT: lui a4, %hi(.LCPI21_3 )
531
+ ; RV32-NEXT: addi a4, a4, %lo(.LCPI21_3 )
518
532
; RV32-NEXT: lui t0, 786624
519
533
; RV32-NEXT: li a5, 48
520
534
; RV32-NEXT: lui a6, 768
@@ -693,8 +707,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
693
707
; RV32-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
694
708
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
695
709
; RV32-NEXT: vrgatherei16.vv v24, v8, v2
696
- ; RV32-NEXT: lui a1, %hi(.LCPI20_2 )
697
- ; RV32-NEXT: addi a1, a1, %lo(.LCPI20_2 )
710
+ ; RV32-NEXT: lui a1, %hi(.LCPI21_2 )
711
+ ; RV32-NEXT: addi a1, a1, %lo(.LCPI21_2 )
698
712
; RV32-NEXT: lui a3, 3073
699
713
; RV32-NEXT: addi a3, a3, -1024
700
714
; RV32-NEXT: vmv.s.x v0, a3
@@ -758,16 +772,16 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
758
772
; RV32-NEXT: vrgatherei16.vv v28, v8, v3
759
773
; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma
760
774
; RV32-NEXT: vmv.v.v v28, v24
761
- ; RV32-NEXT: lui a1, %hi(.LCPI20_4 )
762
- ; RV32-NEXT: addi a1, a1, %lo(.LCPI20_4 )
763
- ; RV32-NEXT: lui a2, %hi(.LCPI20_5 )
764
- ; RV32-NEXT: addi a2, a2, %lo(.LCPI20_5 )
775
+ ; RV32-NEXT: lui a1, %hi(.LCPI21_4 )
776
+ ; RV32-NEXT: addi a1, a1, %lo(.LCPI21_4 )
777
+ ; RV32-NEXT: lui a2, %hi(.LCPI21_5 )
778
+ ; RV32-NEXT: addi a2, a2, %lo(.LCPI21_5 )
765
779
; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
766
780
; RV32-NEXT: vle16.v v24, (a2)
767
781
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
768
782
; RV32-NEXT: vle16.v v8, (a1)
769
- ; RV32-NEXT: lui a1, %hi(.LCPI20_7 )
770
- ; RV32-NEXT: addi a1, a1, %lo(.LCPI20_7 )
783
+ ; RV32-NEXT: lui a1, %hi(.LCPI21_7 )
784
+ ; RV32-NEXT: addi a1, a1, %lo(.LCPI21_7 )
771
785
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
772
786
; RV32-NEXT: vle16.v v10, (a1)
773
787
; RV32-NEXT: csrr a1, vlenb
@@ -795,14 +809,14 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
795
809
; RV32-NEXT: vl8r.v v0, (a1) # vscale x 64-byte Folded Reload
796
810
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
797
811
; RV32-NEXT: vrgatherei16.vv v16, v0, v10
798
- ; RV32-NEXT: lui a1, %hi(.LCPI20_6 )
799
- ; RV32-NEXT: addi a1, a1, %lo(.LCPI20_6 )
800
- ; RV32-NEXT: lui a2, %hi(.LCPI20_8 )
801
- ; RV32-NEXT: addi a2, a2, %lo(.LCPI20_8 )
812
+ ; RV32-NEXT: lui a1, %hi(.LCPI21_6 )
813
+ ; RV32-NEXT: addi a1, a1, %lo(.LCPI21_6 )
814
+ ; RV32-NEXT: lui a2, %hi(.LCPI21_8 )
815
+ ; RV32-NEXT: addi a2, a2, %lo(.LCPI21_8 )
802
816
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
803
817
; RV32-NEXT: vle16.v v4, (a1)
804
- ; RV32-NEXT: lui a1, %hi(.LCPI20_9 )
805
- ; RV32-NEXT: addi a1, a1, %lo(.LCPI20_9 )
818
+ ; RV32-NEXT: lui a1, %hi(.LCPI21_9 )
819
+ ; RV32-NEXT: addi a1, a1, %lo(.LCPI21_9 )
806
820
; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
807
821
; RV32-NEXT: vle16.v v6, (a1)
808
822
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
@@ -889,8 +903,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
889
903
; RV64-NEXT: li a4, 128
890
904
; RV64-NEXT: lui a1, 1
891
905
; RV64-NEXT: vle64.v v8, (a3)
892
- ; RV64-NEXT: lui a3, %hi(.LCPI20_0 )
893
- ; RV64-NEXT: addi a3, a3, %lo(.LCPI20_0 )
906
+ ; RV64-NEXT: lui a3, %hi(.LCPI21_0 )
907
+ ; RV64-NEXT: addi a3, a3, %lo(.LCPI21_0 )
894
908
; RV64-NEXT: vmv.s.x v0, a4
895
909
; RV64-NEXT: csrr a4, vlenb
896
910
; RV64-NEXT: li a5, 61
@@ -1078,8 +1092,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
1078
1092
; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
1079
1093
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
1080
1094
; RV64-NEXT: vslideup.vi v12, v16, 1, v0.t
1081
- ; RV64-NEXT: lui a2, %hi(.LCPI20_1 )
1082
- ; RV64-NEXT: addi a2, a2, %lo(.LCPI20_1 )
1095
+ ; RV64-NEXT: lui a2, %hi(.LCPI21_1 )
1096
+ ; RV64-NEXT: addi a2, a2, %lo(.LCPI21_1 )
1083
1097
; RV64-NEXT: li a3, 192
1084
1098
; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
1085
1099
; RV64-NEXT: vle16.v v6, (a2)
@@ -1113,8 +1127,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
1113
1127
; RV64-NEXT: vrgatherei16.vv v24, v16, v6
1114
1128
; RV64-NEXT: addi a2, sp, 16
1115
1129
; RV64-NEXT: vs8r.v v24, (a2) # vscale x 64-byte Folded Spill
1116
- ; RV64-NEXT: lui a2, %hi(.LCPI20_2 )
1117
- ; RV64-NEXT: addi a2, a2, %lo(.LCPI20_2 )
1130
+ ; RV64-NEXT: lui a2, %hi(.LCPI21_2 )
1131
+ ; RV64-NEXT: addi a2, a2, %lo(.LCPI21_2 )
1118
1132
; RV64-NEXT: li a3, 1040
1119
1133
; RV64-NEXT: vmv.s.x v0, a3
1120
1134
; RV64-NEXT: addi a1, a1, -2016
@@ -1198,12 +1212,12 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
1198
1212
; RV64-NEXT: add a1, sp, a1
1199
1213
; RV64-NEXT: addi a1, a1, 16
1200
1214
; RV64-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill
1201
- ; RV64-NEXT: lui a1, %hi(.LCPI20_3 )
1202
- ; RV64-NEXT: addi a1, a1, %lo(.LCPI20_3 )
1215
+ ; RV64-NEXT: lui a1, %hi(.LCPI21_3 )
1216
+ ; RV64-NEXT: addi a1, a1, %lo(.LCPI21_3 )
1203
1217
; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
1204
1218
; RV64-NEXT: vle16.v v20, (a1)
1205
- ; RV64-NEXT: lui a1, %hi(.LCPI20_4 )
1206
- ; RV64-NEXT: addi a1, a1, %lo(.LCPI20_4 )
1219
+ ; RV64-NEXT: lui a1, %hi(.LCPI21_4 )
1220
+ ; RV64-NEXT: addi a1, a1, %lo(.LCPI21_4 )
1207
1221
; RV64-NEXT: vle16.v v8, (a1)
1208
1222
; RV64-NEXT: csrr a1, vlenb
1209
1223
; RV64-NEXT: li a2, 77
@@ -1254,8 +1268,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
1254
1268
; RV64-NEXT: vl2r.v v8, (a1) # vscale x 16-byte Folded Reload
1255
1269
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1256
1270
; RV64-NEXT: vrgatherei16.vv v0, v16, v8
1257
- ; RV64-NEXT: lui a1, %hi(.LCPI20_5 )
1258
- ; RV64-NEXT: addi a1, a1, %lo(.LCPI20_5 )
1271
+ ; RV64-NEXT: lui a1, %hi(.LCPI21_5 )
1272
+ ; RV64-NEXT: addi a1, a1, %lo(.LCPI21_5 )
1259
1273
; RV64-NEXT: vle16.v v20, (a1)
1260
1274
; RV64-NEXT: csrr a1, vlenb
1261
1275
; RV64-NEXT: li a2, 61
@@ -1472,6 +1486,19 @@ define void @vpstore_factor2(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1) {
1472
1486
ret void
1473
1487
}
1474
1488
1489
+ define void @vpstore_factor2_interleaved_mask_intrinsic (ptr %ptr , <4 x i32 > %v0 , <4 x i32 > %v1 , <4 x i1 > %m ) {
1490
+ ; CHECK-LABEL: vpstore_factor2_interleaved_mask_intrinsic:
1491
+ ; CHECK: # %bb.0:
1492
+ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1493
+ ; CHECK-NEXT: vsseg2e32.v v8, (a0), v0.t
1494
+ ; CHECK-NEXT: ret
1495
+ %interleaved.mask = call <8 x i1 > @llvm.vector.interleave2 (<4 x i1 > %m , <4 x i1 > %m )
1496
+ %interleaved.vec = shufflevector <4 x i32 > %v0 , <4 x i32 > %v1 , <8 x i32 > <i32 0 , i32 4 , i32 1 , i32 5 , i32 2 , i32 6 , i32 3 , i32 7 >
1497
+ tail call void @llvm.vp.store.v8i32.p0 (<8 x i32 > %interleaved.vec , ptr %ptr , <8 x i1 > %interleaved.mask , i32 8 )
1498
+ ret void
1499
+ }
1500
+
1501
+
1475
1502
define void @vpstore_factor3 (ptr %ptr , <4 x i32 > %v0 , <4 x i32 > %v1 , <4 x i32 > %v2 ) {
1476
1503
; CHECK-LABEL: vpstore_factor3:
1477
1504
; CHECK: # %bb.0:
@@ -1839,8 +1866,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_mask(ptr %ptr) {
1839
1866
; RV32-NEXT: vle32.v v12, (a0), v0.t
1840
1867
; RV32-NEXT: li a0, 36
1841
1868
; RV32-NEXT: vmv.s.x v20, a1
1842
- ; RV32-NEXT: lui a1, %hi(.LCPI54_0 )
1843
- ; RV32-NEXT: addi a1, a1, %lo(.LCPI54_0 )
1869
+ ; RV32-NEXT: lui a1, %hi(.LCPI56_0 )
1870
+ ; RV32-NEXT: addi a1, a1, %lo(.LCPI56_0 )
1844
1871
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1845
1872
; RV32-NEXT: vle16.v v21, (a1)
1846
1873
; RV32-NEXT: vcompress.vm v8, v12, v11
@@ -1915,8 +1942,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_evl(ptr %ptr) {
1915
1942
; RV32-NEXT: vmv.s.x v10, a0
1916
1943
; RV32-NEXT: li a0, 146
1917
1944
; RV32-NEXT: vmv.s.x v11, a0
1918
- ; RV32-NEXT: lui a0, %hi(.LCPI55_0 )
1919
- ; RV32-NEXT: addi a0, a0, %lo(.LCPI55_0 )
1945
+ ; RV32-NEXT: lui a0, %hi(.LCPI57_0 )
1946
+ ; RV32-NEXT: addi a0, a0, %lo(.LCPI57_0 )
1920
1947
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1921
1948
; RV32-NEXT: vle16.v v20, (a0)
1922
1949
; RV32-NEXT: li a0, 36
0 commit comments