@@ -207,6 +207,22 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @vpload_factor3(ptr %ptr) {
207207 ret {<4 x i32 >, <4 x i32 >, <4 x i32 >} %res2
208208}
209209
210+ ; We only extract some of the fields.
211+ define {<4 x i32 >, <4 x i32 >} @vpload_factor3_partial (ptr %ptr ) {
212+ ; CHECK-LABEL: vpload_factor3_partial:
213+ ; CHECK: # %bb.0:
214+ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
215+ ; CHECK-NEXT: vlseg3e32.v v7, (a0)
216+ ; CHECK-NEXT: vmv1r.v v8, v7
217+ ; CHECK-NEXT: ret
218+ %interleaved.vec = tail call <12 x i32 > @llvm.vp.load.v12i32.p0 (ptr %ptr , <12 x i1 > splat (i1 true ), i32 12 )
219+ %v0 = shufflevector <12 x i32 > %interleaved.vec , <12 x i32 > poison, <4 x i32 > <i32 0 , i32 3 , i32 6 , i32 9 >
220+ %v2 = shufflevector <12 x i32 > %interleaved.vec , <12 x i32 > poison, <4 x i32 > <i32 2 , i32 5 , i32 8 , i32 11 >
221+ %res0 = insertvalue {<4 x i32 >, <4 x i32 >} poison, <4 x i32 > %v0 , 0
222+ %res1 = insertvalue {<4 x i32 >, <4 x i32 >} %res0 , <4 x i32 > %v2 , 1
223+ ret {<4 x i32 >, <4 x i32 >} %res1
224+ }
225+
210226; Load a larger vector but only deinterleave a subset of the elements.
211227define {<4 x i32 >, <4 x i32 >, <4 x i32 >} @vpload_factor3_v16i32 (ptr %ptr ) {
212228; CHECK-LABEL: vpload_factor3_v16i32:
@@ -224,6 +240,7 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @vpload_factor3_v16i32(ptr %ptr) {
224240 ret {<4 x i32 >, <4 x i32 >, <4 x i32 >} %res2
225241}
226242
243+ ; Make sure the mask is propagated.
227244define {<4 x i32 >, <4 x i32 >, <4 x i32 >} @vpload_factor3_mask (ptr %ptr ) {
228245; CHECK-LABEL: vpload_factor3_mask:
229246; CHECK: # %bb.0:
@@ -241,6 +258,24 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @vpload_factor3_mask(ptr %ptr) {
241258 ret {<4 x i32 >, <4 x i32 >, <4 x i32 >} %res2
242259}
243260
261+ ; Poison/undef in the shuffle mask shouldn't affect anything.
262+ define {<4 x i32 >, <4 x i32 >, <4 x i32 >} @vpload_factor3_poison_shufflemask (ptr %ptr ) {
263+ ; CHECK-LABEL: vpload_factor3_poison_shufflemask:
264+ ; CHECK: # %bb.0:
265+ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
266+ ; CHECK-NEXT: vmv.v.i v0, 10
267+ ; CHECK-NEXT: vlseg3e32.v v8, (a0), v0.t
268+ ; CHECK-NEXT: ret
269+ %interleaved.vec = tail call <12 x i32 > @llvm.vp.load.v12i32.p0 (ptr %ptr , <12 x i1 > <i1 0 , i1 0 , i1 0 , i1 1 , i1 1 , i1 1 , i1 0 , i1 0 , i1 0 , i1 1 , i1 1 , i1 1 >, i32 12 )
270+ %v0 = shufflevector <12 x i32 > %interleaved.vec , <12 x i32 > poison, <4 x i32 > <i32 0 , i32 3 , i32 6 , i32 9 >
271+ %v1 = shufflevector <12 x i32 > %interleaved.vec , <12 x i32 > poison, <4 x i32 > <i32 1 , i32 4 , i32 poison, i32 10 >
272+ %v2 = shufflevector <12 x i32 > %interleaved.vec , <12 x i32 > poison, <4 x i32 > <i32 2 , i32 5 , i32 8 , i32 11 >
273+ %res0 = insertvalue {<4 x i32 >, <4 x i32 >, <4 x i32 >} undef , <4 x i32 > %v0 , 0
274+ %res1 = insertvalue {<4 x i32 >, <4 x i32 >, <4 x i32 >} %res0 , <4 x i32 > %v1 , 1
275+ %res2 = insertvalue {<4 x i32 >, <4 x i32 >, <4 x i32 >} %res1 , <4 x i32 > %v2 , 2
276+ ret {<4 x i32 >, <4 x i32 >, <4 x i32 >} %res2
277+ }
278+
244279define {<4 x i32 >, <4 x i32 >, <4 x i32 >, <4 x i32 >} @vpload_factor4 (ptr %ptr ) {
245280; CHECK-LABEL: vpload_factor4:
246281; CHECK: # %bb.0:
@@ -367,8 +402,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
367402; RV32-NEXT: li a2, 32
368403; RV32-NEXT: lui a3, 12
369404; RV32-NEXT: lui a6, 12291
370- ; RV32-NEXT: lui a7, %hi(.LCPI17_0 )
371- ; RV32-NEXT: addi a7, a7, %lo(.LCPI17_0 )
405+ ; RV32-NEXT: lui a7, %hi(.LCPI19_0 )
406+ ; RV32-NEXT: addi a7, a7, %lo(.LCPI19_0 )
372407; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
373408; RV32-NEXT: vle32.v v24, (a5)
374409; RV32-NEXT: vmv.s.x v0, a3
@@ -453,12 +488,12 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
453488; RV32-NEXT: addi a1, a1, 16
454489; RV32-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill
455490; RV32-NEXT: lui a7, 49164
456- ; RV32-NEXT: lui a1, %hi(.LCPI17_1 )
457- ; RV32-NEXT: addi a1, a1, %lo(.LCPI17_1 )
491+ ; RV32-NEXT: lui a1, %hi(.LCPI19_1 )
492+ ; RV32-NEXT: addi a1, a1, %lo(.LCPI19_1 )
458493; RV32-NEXT: lui t2, 3
459494; RV32-NEXT: lui t1, 196656
460- ; RV32-NEXT: lui a4, %hi(.LCPI17_3 )
461- ; RV32-NEXT: addi a4, a4, %lo(.LCPI17_3 )
495+ ; RV32-NEXT: lui a4, %hi(.LCPI19_3 )
496+ ; RV32-NEXT: addi a4, a4, %lo(.LCPI19_3 )
462497; RV32-NEXT: lui t0, 786624
463498; RV32-NEXT: li a5, 48
464499; RV32-NEXT: lui a6, 768
@@ -637,8 +672,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
637672; RV32-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
638673; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
639674; RV32-NEXT: vrgatherei16.vv v24, v8, v2
640- ; RV32-NEXT: lui a1, %hi(.LCPI17_2 )
641- ; RV32-NEXT: addi a1, a1, %lo(.LCPI17_2 )
675+ ; RV32-NEXT: lui a1, %hi(.LCPI19_2 )
676+ ; RV32-NEXT: addi a1, a1, %lo(.LCPI19_2 )
642677; RV32-NEXT: lui a3, 3073
643678; RV32-NEXT: addi a3, a3, -1024
644679; RV32-NEXT: vmv.s.x v0, a3
@@ -702,16 +737,16 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
702737; RV32-NEXT: vrgatherei16.vv v28, v8, v3
703738; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma
704739; RV32-NEXT: vmv.v.v v28, v24
705- ; RV32-NEXT: lui a1, %hi(.LCPI17_4 )
706- ; RV32-NEXT: addi a1, a1, %lo(.LCPI17_4 )
707- ; RV32-NEXT: lui a2, %hi(.LCPI17_5 )
708- ; RV32-NEXT: addi a2, a2, %lo(.LCPI17_5 )
740+ ; RV32-NEXT: lui a1, %hi(.LCPI19_4 )
741+ ; RV32-NEXT: addi a1, a1, %lo(.LCPI19_4 )
742+ ; RV32-NEXT: lui a2, %hi(.LCPI19_5 )
743+ ; RV32-NEXT: addi a2, a2, %lo(.LCPI19_5 )
709744; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
710745; RV32-NEXT: vle16.v v24, (a2)
711746; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
712747; RV32-NEXT: vle16.v v8, (a1)
713- ; RV32-NEXT: lui a1, %hi(.LCPI17_7 )
714- ; RV32-NEXT: addi a1, a1, %lo(.LCPI17_7 )
748+ ; RV32-NEXT: lui a1, %hi(.LCPI19_7 )
749+ ; RV32-NEXT: addi a1, a1, %lo(.LCPI19_7 )
715750; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
716751; RV32-NEXT: vle16.v v10, (a1)
717752; RV32-NEXT: csrr a1, vlenb
@@ -739,14 +774,14 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
739774; RV32-NEXT: vl8r.v v0, (a1) # vscale x 64-byte Folded Reload
740775; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
741776; RV32-NEXT: vrgatherei16.vv v16, v0, v10
742- ; RV32-NEXT: lui a1, %hi(.LCPI17_6 )
743- ; RV32-NEXT: addi a1, a1, %lo(.LCPI17_6 )
744- ; RV32-NEXT: lui a2, %hi(.LCPI17_8 )
745- ; RV32-NEXT: addi a2, a2, %lo(.LCPI17_8 )
777+ ; RV32-NEXT: lui a1, %hi(.LCPI19_6 )
778+ ; RV32-NEXT: addi a1, a1, %lo(.LCPI19_6 )
779+ ; RV32-NEXT: lui a2, %hi(.LCPI19_8 )
780+ ; RV32-NEXT: addi a2, a2, %lo(.LCPI19_8 )
746781; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
747782; RV32-NEXT: vle16.v v4, (a1)
748- ; RV32-NEXT: lui a1, %hi(.LCPI17_9 )
749- ; RV32-NEXT: addi a1, a1, %lo(.LCPI17_9 )
783+ ; RV32-NEXT: lui a1, %hi(.LCPI19_9 )
784+ ; RV32-NEXT: addi a1, a1, %lo(.LCPI19_9 )
750785; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
751786; RV32-NEXT: vle16.v v6, (a1)
752787; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
@@ -833,8 +868,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
833868; RV64-NEXT: li a4, 128
834869; RV64-NEXT: lui a1, 1
835870; RV64-NEXT: vle64.v v8, (a3)
836- ; RV64-NEXT: lui a3, %hi(.LCPI17_0 )
837- ; RV64-NEXT: addi a3, a3, %lo(.LCPI17_0 )
871+ ; RV64-NEXT: lui a3, %hi(.LCPI19_0 )
872+ ; RV64-NEXT: addi a3, a3, %lo(.LCPI19_0 )
838873; RV64-NEXT: vmv.s.x v0, a4
839874; RV64-NEXT: csrr a4, vlenb
840875; RV64-NEXT: li a5, 61
@@ -1022,8 +1057,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
10221057; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
10231058; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
10241059; RV64-NEXT: vslideup.vi v12, v16, 1, v0.t
1025- ; RV64-NEXT: lui a2, %hi(.LCPI17_1 )
1026- ; RV64-NEXT: addi a2, a2, %lo(.LCPI17_1 )
1060+ ; RV64-NEXT: lui a2, %hi(.LCPI19_1 )
1061+ ; RV64-NEXT: addi a2, a2, %lo(.LCPI19_1 )
10271062; RV64-NEXT: li a3, 192
10281063; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
10291064; RV64-NEXT: vle16.v v6, (a2)
@@ -1057,8 +1092,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
10571092; RV64-NEXT: vrgatherei16.vv v24, v16, v6
10581093; RV64-NEXT: addi a2, sp, 16
10591094; RV64-NEXT: vs8r.v v24, (a2) # vscale x 64-byte Folded Spill
1060- ; RV64-NEXT: lui a2, %hi(.LCPI17_2 )
1061- ; RV64-NEXT: addi a2, a2, %lo(.LCPI17_2 )
1095+ ; RV64-NEXT: lui a2, %hi(.LCPI19_2 )
1096+ ; RV64-NEXT: addi a2, a2, %lo(.LCPI19_2 )
10621097; RV64-NEXT: li a3, 1040
10631098; RV64-NEXT: vmv.s.x v0, a3
10641099; RV64-NEXT: addi a1, a1, -2016
@@ -1142,12 +1177,12 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
11421177; RV64-NEXT: add a1, sp, a1
11431178; RV64-NEXT: addi a1, a1, 16
11441179; RV64-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill
1145- ; RV64-NEXT: lui a1, %hi(.LCPI17_3 )
1146- ; RV64-NEXT: addi a1, a1, %lo(.LCPI17_3 )
1180+ ; RV64-NEXT: lui a1, %hi(.LCPI19_3 )
1181+ ; RV64-NEXT: addi a1, a1, %lo(.LCPI19_3 )
11471182; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
11481183; RV64-NEXT: vle16.v v20, (a1)
1149- ; RV64-NEXT: lui a1, %hi(.LCPI17_4 )
1150- ; RV64-NEXT: addi a1, a1, %lo(.LCPI17_4 )
1184+ ; RV64-NEXT: lui a1, %hi(.LCPI19_4 )
1185+ ; RV64-NEXT: addi a1, a1, %lo(.LCPI19_4 )
11511186; RV64-NEXT: vle16.v v8, (a1)
11521187; RV64-NEXT: csrr a1, vlenb
11531188; RV64-NEXT: li a2, 77
@@ -1198,8 +1233,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
11981233; RV64-NEXT: vl2r.v v8, (a1) # vscale x 16-byte Folded Reload
11991234; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
12001235; RV64-NEXT: vrgatherei16.vv v0, v16, v8
1201- ; RV64-NEXT: lui a1, %hi(.LCPI17_5 )
1202- ; RV64-NEXT: addi a1, a1, %lo(.LCPI17_5 )
1236+ ; RV64-NEXT: lui a1, %hi(.LCPI19_5 )
1237+ ; RV64-NEXT: addi a1, a1, %lo(.LCPI19_5 )
12031238; RV64-NEXT: vle16.v v20, (a1)
12041239; RV64-NEXT: csrr a1, vlenb
12051240; RV64-NEXT: li a2, 61
@@ -1643,8 +1678,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_mask(ptr %ptr) {
16431678; RV32-NEXT: vle32.v v12, (a0), v0.t
16441679; RV32-NEXT: li a0, 36
16451680; RV32-NEXT: vmv.s.x v20, a1
1646- ; RV32-NEXT: lui a1, %hi(.LCPI42_0 )
1647- ; RV32-NEXT: addi a1, a1, %lo(.LCPI42_0 )
1681+ ; RV32-NEXT: lui a1, %hi(.LCPI44_0 )
1682+ ; RV32-NEXT: addi a1, a1, %lo(.LCPI44_0 )
16481683; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
16491684; RV32-NEXT: vle16.v v21, (a1)
16501685; RV32-NEXT: vcompress.vm v8, v12, v11
@@ -1719,8 +1754,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_evl(ptr %ptr) {
17191754; RV32-NEXT: vmv.s.x v10, a0
17201755; RV32-NEXT: li a0, 146
17211756; RV32-NEXT: vmv.s.x v11, a0
1722- ; RV32-NEXT: lui a0, %hi(.LCPI43_0 )
1723- ; RV32-NEXT: addi a0, a0, %lo(.LCPI43_0 )
1757+ ; RV32-NEXT: lui a0, %hi(.LCPI45_0 )
1758+ ; RV32-NEXT: addi a0, a0, %lo(.LCPI45_0 )
17241759; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
17251760; RV32-NEXT: vle16.v v20, (a0)
17261761; RV32-NEXT: li a0, 36
0 commit comments