@@ -126,6 +126,56 @@ define {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} @load_
126126 ret {<2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >} %res5
127127}
128128
129+ define {<2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >} @load_factor7 (ptr %ptr ) {
130+ ; CHECK-LABEL: load_factor7:
131+ ; CHECK: # %bb.0:
132+ ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
133+ ; CHECK-NEXT: vlseg7e16.v v8, (a0)
134+ ; CHECK-NEXT: ret
135+ %interleaved.vec = load <14 x i16 >, ptr %ptr
136+ %v0 = shufflevector <14 x i16 > %interleaved.vec , <14 x i16 > poison, <2 x i32 > <i32 0 , i32 7 >
137+ %v1 = shufflevector <14 x i16 > %interleaved.vec , <14 x i16 > poison, <2 x i32 > <i32 1 , i32 8 >
138+ %v2 = shufflevector <14 x i16 > %interleaved.vec , <14 x i16 > poison, <2 x i32 > <i32 2 , i32 9 >
139+ %v3 = shufflevector <14 x i16 > %interleaved.vec , <14 x i16 > poison, <2 x i32 > <i32 3 , i32 10 >
140+ %v4 = shufflevector <14 x i16 > %interleaved.vec , <14 x i16 > poison, <2 x i32 > <i32 4 , i32 11 >
141+ %v5 = shufflevector <14 x i16 > %interleaved.vec , <14 x i16 > poison, <2 x i32 > <i32 5 , i32 12 >
142+ %v6 = shufflevector <14 x i16 > %interleaved.vec , <14 x i16 > poison, <2 x i32 > <i32 6 , i32 13 >
143+ %res0 = insertvalue {<2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >} undef , <2 x i16 > %v0 , 0
144+ %res1 = insertvalue {<2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >} %res0 , <2 x i16 > %v1 , 1
145+ %res2 = insertvalue {<2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >} %res1 , <2 x i16 > %v2 , 2
146+ %res3 = insertvalue {<2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >} %res2 , <2 x i16 > %v3 , 3
147+ %res4 = insertvalue {<2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >} %res3 , <2 x i16 > %v4 , 4
148+ %res5 = insertvalue {<2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >} %res4 , <2 x i16 > %v5 , 5
149+ %res6 = insertvalue {<2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >} %res5 , <2 x i16 > %v6 , 6
150+ ret {<2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >} %res6
151+ }
152+
153+ define {<2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >} @load_factor8 (ptr %ptr ) {
154+ ; CHECK-LABEL: load_factor8:
155+ ; CHECK: # %bb.0:
156+ ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
157+ ; CHECK-NEXT: vlseg8e16.v v8, (a0)
158+ ; CHECK-NEXT: ret
159+ %interleaved.vec = load <16 x i16 >, ptr %ptr
160+ %v0 = shufflevector <16 x i16 > %interleaved.vec , <16 x i16 > poison, <2 x i32 > <i32 0 , i32 8 >
161+ %v1 = shufflevector <16 x i16 > %interleaved.vec , <16 x i16 > poison, <2 x i32 > <i32 1 , i32 9 >
162+ %v2 = shufflevector <16 x i16 > %interleaved.vec , <16 x i16 > poison, <2 x i32 > <i32 2 , i32 10 >
163+ %v3 = shufflevector <16 x i16 > %interleaved.vec , <16 x i16 > poison, <2 x i32 > <i32 3 , i32 11 >
164+ %v4 = shufflevector <16 x i16 > %interleaved.vec , <16 x i16 > poison, <2 x i32 > <i32 4 , i32 12 >
165+ %v5 = shufflevector <16 x i16 > %interleaved.vec , <16 x i16 > poison, <2 x i32 > <i32 5 , i32 13 >
166+ %v6 = shufflevector <16 x i16 > %interleaved.vec , <16 x i16 > poison, <2 x i32 > <i32 6 , i32 14 >
167+ %v7 = shufflevector <16 x i16 > %interleaved.vec , <16 x i16 > poison, <2 x i32 > <i32 7 , i32 15 >
168+ %res0 = insertvalue {<2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >} undef , <2 x i16 > %v0 , 0
169+ %res1 = insertvalue {<2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >} %res0 , <2 x i16 > %v1 , 1
170+ %res2 = insertvalue {<2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >} %res1 , <2 x i16 > %v2 , 2
171+ %res3 = insertvalue {<2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >} %res2 , <2 x i16 > %v3 , 3
172+ %res4 = insertvalue {<2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >} %res3 , <2 x i16 > %v4 , 4
173+ %res5 = insertvalue {<2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >} %res4 , <2 x i16 > %v5 , 5
174+ %res6 = insertvalue {<2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >} %res5 , <2 x i16 > %v6 , 6
175+ %res7 = insertvalue {<2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >} %res6 , <2 x i16 > %v7 , 7
176+ ret {<2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >, <2 x i16 >} %res7
177+ }
178+
129179; LMUL * NF is > 8 here and so shouldn't be lowered to a vlseg
130180define {<8 x i64 >, <8 x i64 >, <8 x i64 >, <8 x i64 >, <8 x i64 >, <8 x i64 >} @load_factor6_too_big (ptr %ptr ) {
131181; RV32-LABEL: load_factor6_too_big:
@@ -174,12 +224,12 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
174224; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill
175225; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu
176226; RV32-NEXT: vslideup.vi v4, v8, 10, v0.t
177- ; RV32-NEXT: lui a4, %hi(.LCPI6_0 )
178- ; RV32-NEXT: addi a4, a4, %lo(.LCPI6_0 )
227+ ; RV32-NEXT: lui a4, %hi(.LCPI8_0 )
228+ ; RV32-NEXT: addi a4, a4, %lo(.LCPI8_0 )
179229; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu
180230; RV32-NEXT: vle16.v v0, (a4)
181- ; RV32-NEXT: lui a4, %hi(.LCPI6_1 )
182- ; RV32-NEXT: addi a4, a4, %lo(.LCPI6_1 )
231+ ; RV32-NEXT: lui a4, %hi(.LCPI8_1 )
232+ ; RV32-NEXT: addi a4, a4, %lo(.LCPI8_1 )
183233; RV32-NEXT: lui a5, 1
184234; RV32-NEXT: vle16.v v8, (a4)
185235; RV32-NEXT: csrr a4, vlenb
@@ -260,10 +310,10 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
260310; RV32-NEXT: add a1, sp, a1
261311; RV32-NEXT: addi a1, a1, 16
262312; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill
263- ; RV32-NEXT: lui a1, %hi(.LCPI6_2 )
264- ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_2 )
265- ; RV32-NEXT: lui a3, %hi(.LCPI6_3 )
266- ; RV32-NEXT: addi a3, a3, %lo(.LCPI6_3 )
313+ ; RV32-NEXT: lui a1, %hi(.LCPI8_2 )
314+ ; RV32-NEXT: addi a1, a1, %lo(.LCPI8_2 )
315+ ; RV32-NEXT: lui a3, %hi(.LCPI8_3 )
316+ ; RV32-NEXT: addi a3, a3, %lo(.LCPI8_3 )
267317; RV32-NEXT: vsetvli zero, a2, e16, m4, ta, ma
268318; RV32-NEXT: vle16.v v12, (a1)
269319; RV32-NEXT: vle16.v v8, (a3)
@@ -273,8 +323,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
273323; RV32-NEXT: add a1, sp, a1
274324; RV32-NEXT: addi a1, a1, 16
275325; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill
276- ; RV32-NEXT: lui a1, %hi(.LCPI6_4 )
277- ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_4 )
326+ ; RV32-NEXT: lui a1, %hi(.LCPI8_4 )
327+ ; RV32-NEXT: addi a1, a1, %lo(.LCPI8_4 )
278328; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
279329; RV32-NEXT: vle16.v v2, (a1)
280330; RV32-NEXT: csrr a1, vlenb
@@ -340,10 +390,10 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
340390; RV32-NEXT: add a1, sp, a1
341391; RV32-NEXT: addi a1, a1, 16
342392; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill
343- ; RV32-NEXT: lui a1, %hi(.LCPI6_5 )
344- ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_5 )
345- ; RV32-NEXT: lui a3, %hi(.LCPI6_6 )
346- ; RV32-NEXT: addi a3, a3, %lo(.LCPI6_6 )
393+ ; RV32-NEXT: lui a1, %hi(.LCPI8_5 )
394+ ; RV32-NEXT: addi a1, a1, %lo(.LCPI8_5 )
395+ ; RV32-NEXT: lui a3, %hi(.LCPI8_6 )
396+ ; RV32-NEXT: addi a3, a3, %lo(.LCPI8_6 )
347397; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu
348398; RV32-NEXT: vle16.v v24, (a1)
349399; RV32-NEXT: vle16.v v4, (a3)
@@ -368,14 +418,14 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
368418; RV32-NEXT: add a1, sp, a1
369419; RV32-NEXT: addi a1, a1, 16
370420; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
371- ; RV32-NEXT: lui a1, %hi(.LCPI6_7 )
372- ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_7 )
373- ; RV32-NEXT: lui a3, %hi(.LCPI6_8 )
374- ; RV32-NEXT: addi a3, a3, %lo(.LCPI6_8 )
421+ ; RV32-NEXT: lui a1, %hi(.LCPI8_7 )
422+ ; RV32-NEXT: addi a1, a1, %lo(.LCPI8_7 )
423+ ; RV32-NEXT: lui a3, %hi(.LCPI8_8 )
424+ ; RV32-NEXT: addi a3, a3, %lo(.LCPI8_8 )
375425; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
376426; RV32-NEXT: vle16.v v16, (a1)
377- ; RV32-NEXT: lui a1, %hi(.LCPI6_9 )
378- ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_9 )
427+ ; RV32-NEXT: lui a1, %hi(.LCPI8_9 )
428+ ; RV32-NEXT: addi a1, a1, %lo(.LCPI8_9 )
379429; RV32-NEXT: vsetvli zero, a2, e16, m4, ta, ma
380430; RV32-NEXT: vle16.v v8, (a3)
381431; RV32-NEXT: csrr a3, vlenb
@@ -440,8 +490,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
440490; RV32-NEXT: add a1, sp, a1
441491; RV32-NEXT: addi a1, a1, 16
442492; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
443- ; RV32-NEXT: lui a1, %hi(.LCPI6_10 )
444- ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_10 )
493+ ; RV32-NEXT: lui a1, %hi(.LCPI8_10 )
494+ ; RV32-NEXT: addi a1, a1, %lo(.LCPI8_10 )
445495; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu
446496; RV32-NEXT: vle16.v v12, (a1)
447497; RV32-NEXT: lui a1, 15
@@ -462,10 +512,10 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
462512; RV32-NEXT: addi a1, a1, 16
463513; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill
464514; RV32-NEXT: vmv4r.v v24, v16
465- ; RV32-NEXT: lui a1, %hi(.LCPI6_11 )
466- ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_11 )
467- ; RV32-NEXT: lui a3, %hi(.LCPI6_12 )
468- ; RV32-NEXT: addi a3, a3, %lo(.LCPI6_12 )
515+ ; RV32-NEXT: lui a1, %hi(.LCPI8_11 )
516+ ; RV32-NEXT: addi a1, a1, %lo(.LCPI8_11 )
517+ ; RV32-NEXT: lui a3, %hi(.LCPI8_12 )
518+ ; RV32-NEXT: addi a3, a3, %lo(.LCPI8_12 )
469519; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu
470520; RV32-NEXT: vle16.v v28, (a1)
471521; RV32-NEXT: vle16.v v4, (a3)
@@ -495,14 +545,14 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
495545; RV32-NEXT: add a1, sp, a1
496546; RV32-NEXT: addi a1, a1, 16
497547; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
498- ; RV32-NEXT: lui a1, %hi(.LCPI6_13 )
499- ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_13 )
500- ; RV32-NEXT: lui a3, %hi(.LCPI6_14 )
501- ; RV32-NEXT: addi a3, a3, %lo(.LCPI6_14 )
548+ ; RV32-NEXT: lui a1, %hi(.LCPI8_13 )
549+ ; RV32-NEXT: addi a1, a1, %lo(.LCPI8_13 )
550+ ; RV32-NEXT: lui a3, %hi(.LCPI8_14 )
551+ ; RV32-NEXT: addi a3, a3, %lo(.LCPI8_14 )
502552; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
503553; RV32-NEXT: vle16.v v8, (a1)
504- ; RV32-NEXT: lui a1, %hi(.LCPI6_15 )
505- ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_15 )
554+ ; RV32-NEXT: lui a1, %hi(.LCPI8_15 )
555+ ; RV32-NEXT: addi a1, a1, %lo(.LCPI8_15 )
506556; RV32-NEXT: vsetvli zero, a2, e16, m4, ta, ma
507557; RV32-NEXT: vle16.v v28, (a3)
508558; RV32-NEXT: vle16.v v12, (a1)
@@ -1131,3 +1181,82 @@ define void @store_factor6(ptr %ptr, <2 x i16> %v0, <2 x i16> %v1, <2 x i16> %v2
11311181 store <12 x i16 > %interleaved.vec , ptr %ptr
11321182 ret void
11331183}
1184+
1185+
1186+ define <4 x i32 > @load_factor2_one_active (ptr %ptr ) {
1187+ ; CHECK-LABEL: load_factor2_one_active:
1188+ ; CHECK: # %bb.0:
1189+ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1190+ ; CHECK-NEXT: vlseg2e32.v v8, (a0)
1191+ ; CHECK-NEXT: ret
1192+ %interleaved.vec = load <8 x i32 >, ptr %ptr
1193+ %v0 = shufflevector <8 x i32 > %interleaved.vec , <8 x i32 > poison, <4 x i32 > <i32 0 , i32 2 , i32 4 , i32 6 >
1194+ ret <4 x i32 > %v0
1195+ }
1196+
1197+
1198+ define <4 x i32 > @load_factor3_one_active (ptr %ptr ) {
1199+ ; CHECK-LABEL: load_factor3_one_active:
1200+ ; CHECK: # %bb.0:
1201+ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1202+ ; CHECK-NEXT: vlseg3e32.v v8, (a0)
1203+ ; CHECK-NEXT: ret
1204+ %interleaved.vec = load <12 x i32 >, ptr %ptr
1205+ %v0 = shufflevector <12 x i32 > %interleaved.vec , <12 x i32 > poison, <4 x i32 > <i32 0 , i32 3 , i32 6 , i32 9 >
1206+ ret <4 x i32 > %v0
1207+ }
1208+
1209+ define <4 x i32 > @load_factor4_one_active (ptr %ptr ) {
1210+ ; CHECK-LABEL: load_factor4_one_active:
1211+ ; CHECK: # %bb.0:
1212+ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1213+ ; CHECK-NEXT: vlseg4e32.v v8, (a0)
1214+ ; CHECK-NEXT: ret
1215+ %interleaved.vec = load <16 x i32 >, ptr %ptr
1216+ %v0 = shufflevector <16 x i32 > %interleaved.vec , <16 x i32 > poison, <4 x i32 > <i32 0 , i32 4 , i32 8 , i32 12 >
1217+ ret <4 x i32 > %v0
1218+ }
1219+
1220+ define <4 x i32 > @load_factor5_one_active (ptr %ptr ) {
1221+ ; CHECK-LABEL: load_factor5_one_active:
1222+ ; CHECK: # %bb.0:
1223+ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1224+ ; CHECK-NEXT: vlseg5e32.v v8, (a0)
1225+ ; CHECK-NEXT: ret
1226+ %interleaved.vec = load <20 x i32 >, ptr %ptr
1227+ %v0 = shufflevector <20 x i32 > %interleaved.vec , <20 x i32 > poison, <4 x i32 > <i32 0 , i32 5 , i32 10 , i32 15 >
1228+ ret <4 x i32 > %v0
1229+ }
1230+
1231+ define <2 x i16 > @load_factor6_one_active (ptr %ptr ) {
1232+ ; CHECK-LABEL: load_factor6_one_active:
1233+ ; CHECK: # %bb.0:
1234+ ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
1235+ ; CHECK-NEXT: vlseg6e16.v v8, (a0)
1236+ ; CHECK-NEXT: ret
1237+ %interleaved.vec = load <12 x i16 >, ptr %ptr
1238+ %v0 = shufflevector <12 x i16 > %interleaved.vec , <12 x i16 > poison, <2 x i32 > <i32 0 , i32 6 >
1239+ ret <2 x i16 > %v0
1240+ }
1241+
1242+ define <4 x i8 > @load_factor7_one_active (ptr %ptr ) vscale_range(8 ,1024 ) {
1243+ ; CHECK-LABEL: load_factor7_one_active:
1244+ ; CHECK: # %bb.0:
1245+ ; CHECK-NEXT: vsetivli zero, 4, e8, mf8, ta, ma
1246+ ; CHECK-NEXT: vlseg7e8.v v8, (a0)
1247+ ; CHECK-NEXT: ret
1248+ %interleaved.vec = load <32 x i8 >, ptr %ptr
1249+ %v0 = shufflevector <32 x i8 > %interleaved.vec , <32 x i8 > poison, <4 x i32 > <i32 0 , i32 7 , i32 14 , i32 21 >
1250+ ret <4 x i8 > %v0
1251+ }
1252+
1253+ define <4 x i8 > @load_factor8_one_active (ptr %ptr ) vscale_range(8 ,1024 ) {
1254+ ; CHECK-LABEL: load_factor8_one_active:
1255+ ; CHECK: # %bb.0:
1256+ ; CHECK-NEXT: vsetivli zero, 4, e8, mf8, ta, ma
1257+ ; CHECK-NEXT: vlseg8e8.v v8, (a0)
1258+ ; CHECK-NEXT: ret
1259+ %interleaved.vec = load <32 x i8 >, ptr %ptr
1260+ %v0 = shufflevector <32 x i8 > %interleaved.vec , <32 x i8 > poison, <4 x i32 > <i32 0 , i32 8 , i32 16 , i32 24 >
1261+ ret <4 x i8 > %v0
1262+ }
0 commit comments