@@ -257,6 +257,49 @@ define {<2 x double>, <2 x double>} @vector_deinterleave_load_v2f64_v4f64(ptr %p
257257 ret {<2 x double >, <2 x double >} %res1
258258}
259259
260+ define { <8 x i8 >, <8 x i8 >, <8 x i8 > } @vector_deinterleave_load_factor3 (ptr %p ) {
261+ ; CHECK-LABEL: vector_deinterleave_load_factor3:
262+ ; CHECK: # %bb.0:
263+ ; CHECK-NEXT: addi sp, sp, -16
264+ ; CHECK-NEXT: .cfi_def_cfa_offset 16
265+ ; CHECK-NEXT: csrr a1, vlenb
266+ ; CHECK-NEXT: slli a1, a1, 1
267+ ; CHECK-NEXT: sub sp, sp, a1
268+ ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
269+ ; CHECK-NEXT: vsetivli zero, 24, e8, m2, ta, ma
270+ ; CHECK-NEXT: vle8.v v8, (a0)
271+ ; CHECK-NEXT: csrr a0, vlenb
272+ ; CHECK-NEXT: srli a0, a0, 1
273+ ; CHECK-NEXT: add a1, a0, a0
274+ ; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma
275+ ; CHECK-NEXT: vslidedown.vi v12, v8, 8
276+ ; CHECK-NEXT: vsetivli zero, 8, e8, m2, ta, ma
277+ ; CHECK-NEXT: vslidedown.vi v10, v8, 16
278+ ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
279+ ; CHECK-NEXT: vslideup.vx v8, v12, a0
280+ ; CHECK-NEXT: addi a0, sp, 16
281+ ; CHECK-NEXT: vmv1r.v v9, v10
282+ ; CHECK-NEXT: vs2r.v v8, (a0)
283+ ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
284+ ; CHECK-NEXT: vlseg3e8.v v6, (a0)
285+ ; CHECK-NEXT: csrr a0, vlenb
286+ ; CHECK-NEXT: slli a0, a0, 1
287+ ; CHECK-NEXT: add sp, sp, a0
288+ ; CHECK-NEXT: .cfi_def_cfa sp, 16
289+ ; CHECK-NEXT: addi sp, sp, 16
290+ ; CHECK-NEXT: .cfi_def_cfa_offset 0
291+ ; CHECK-NEXT: ret
292+ %vec = load <24 x i8 >, ptr %p
293+ %d0 = call {<8 x i8 >, <8 x i8 >, <8 x i8 >} @llvm.vector.deinterleave3 (<24 x i8 > %vec )
294+ %t0 = extractvalue {<8 x i8 >, <8 x i8 >, <8 x i8 >} %d0 , 0
295+ %t1 = extractvalue {<8 x i8 >, <8 x i8 >, <8 x i8 >} %d0 , 1
296+ %t2 = extractvalue {<8 x i8 >, <8 x i8 >, <8 x i8 >} %d0 , 2
297+ %res0 = insertvalue { <8 x i8 >, <8 x i8 >, <8 x i8 > } poison, <8 x i8 > %t0 , 0
298+ %res1 = insertvalue { <8 x i8 >, <8 x i8 >, <8 x i8 > } %res0 , <8 x i8 > %t1 , 0
299+ %res2 = insertvalue { <8 x i8 >, <8 x i8 >, <8 x i8 > } %res1 , <8 x i8 > %t2 , 0
300+ ret { <8 x i8 >, <8 x i8 >, <8 x i8 > } %res2
301+ }
302+
260303define { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } @vector_deinterleave_load_factor4 (ptr %p ) {
261304; CHECK-LABEL: vector_deinterleave_load_factor4:
262305; CHECK: # %bb.0:
@@ -281,6 +324,127 @@ define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @vector_deinterleave_load_fact
281324 ret { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } %res3
282325}
283326
327+ define { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } @vector_deinterleave_load_factor5 (ptr %p ) {
328+ ; CHECK-LABEL: vector_deinterleave_load_factor5:
329+ ; CHECK: # %bb.0:
330+ ; CHECK-NEXT: addi sp, sp, -16
331+ ; CHECK-NEXT: .cfi_def_cfa_offset 16
332+ ; CHECK-NEXT: csrr a1, vlenb
333+ ; CHECK-NEXT: slli a1, a1, 2
334+ ; CHECK-NEXT: sub sp, sp, a1
335+ ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
336+ ; CHECK-NEXT: li a1, 40
337+ ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
338+ ; CHECK-NEXT: vle8.v v8, (a0)
339+ ; CHECK-NEXT: csrr a0, vlenb
340+ ; CHECK-NEXT: srli a0, a0, 1
341+ ; CHECK-NEXT: add a1, a0, a0
342+ ; CHECK-NEXT: vsetivli zero, 8, e8, m2, ta, ma
343+ ; CHECK-NEXT: vslidedown.vi v12, v8, 24
344+ ; CHECK-NEXT: vslidedown.vi v14, v8, 16
345+ ; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma
346+ ; CHECK-NEXT: vslidedown.vi v13, v8, 8
347+ ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
348+ ; CHECK-NEXT: vslideup.vx v14, v12, a0
349+ ; CHECK-NEXT: vmv1r.v v12, v8
350+ ; CHECK-NEXT: vslideup.vx v12, v13, a0
351+ ; CHECK-NEXT: li a0, 32
352+ ; CHECK-NEXT: vsetivli zero, 8, e8, m4, ta, ma
353+ ; CHECK-NEXT: vslidedown.vx v8, v8, a0
354+ ; CHECK-NEXT: vmv1r.v v13, v14
355+ ; CHECK-NEXT: addi a0, sp, 16
356+ ; CHECK-NEXT: vmv2r.v v14, v8
357+ ; CHECK-NEXT: vs4r.v v12, (a0)
358+ ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
359+ ; CHECK-NEXT: vlseg5e8.v v8, (a0)
360+ ; CHECK-NEXT: csrr a0, vlenb
361+ ; CHECK-NEXT: slli a0, a0, 2
362+ ; CHECK-NEXT: add sp, sp, a0
363+ ; CHECK-NEXT: .cfi_def_cfa sp, 16
364+ ; CHECK-NEXT: addi sp, sp, 16
365+ ; CHECK-NEXT: .cfi_def_cfa_offset 0
366+ ; CHECK-NEXT: ret
367+ %vec = load <40 x i8 >, ptr %p
368+ %d0 = call {<8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >} @llvm.vector.deinterleave5 (<40 x i8 > %vec )
369+ %t0 = extractvalue { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } %d0 , 0
370+ %t1 = extractvalue { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } %d0 , 1
371+ %t2 = extractvalue { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } %d0 , 2
372+ %t3 = extractvalue { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } %d0 , 3
373+ %t4 = extractvalue { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } %d0 , 4
374+ %res0 = insertvalue { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } poison, <8 x i8 > %t0 , 0
375+ %res1 = insertvalue { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } %res0 , <8 x i8 > %t1 , 1
376+ %res2 = insertvalue { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } %res1 , <8 x i8 > %t2 , 2
377+ %res3 = insertvalue { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } %res2 , <8 x i8 > %t3 , 3
378+ %res4 = insertvalue { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } %res3 , <8 x i8 > %t4 , 4
379+ ret { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } %res4
380+ }
381+
382+ define { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } @vector_deinterleave_load_factor7 (ptr %p ) {
383+ ; CHECK-LABEL: vector_deinterleave_load_factor7:
384+ ; CHECK: # %bb.0:
385+ ; CHECK-NEXT: addi sp, sp, -16
386+ ; CHECK-NEXT: .cfi_def_cfa_offset 16
387+ ; CHECK-NEXT: csrr a1, vlenb
388+ ; CHECK-NEXT: slli a1, a1, 2
389+ ; CHECK-NEXT: sub sp, sp, a1
390+ ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
391+ ; CHECK-NEXT: li a1, 56
392+ ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
393+ ; CHECK-NEXT: vle8.v v8, (a0)
394+ ; CHECK-NEXT: csrr a0, vlenb
395+ ; CHECK-NEXT: li a1, 40
396+ ; CHECK-NEXT: li a2, 32
397+ ; CHECK-NEXT: vsetivli zero, 8, e8, m4, ta, ma
398+ ; CHECK-NEXT: vslidedown.vx v16, v8, a1
399+ ; CHECK-NEXT: li a1, 48
400+ ; CHECK-NEXT: srli a0, a0, 1
401+ ; CHECK-NEXT: vslidedown.vx v12, v8, a2
402+ ; CHECK-NEXT: add a2, a0, a0
403+ ; CHECK-NEXT: vsetivli zero, 8, e8, m2, ta, ma
404+ ; CHECK-NEXT: vslidedown.vi v14, v8, 24
405+ ; CHECK-NEXT: vslidedown.vi v18, v8, 16
406+ ; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma
407+ ; CHECK-NEXT: vslidedown.vi v13, v8, 8
408+ ; CHECK-NEXT: vsetvli zero, a2, e8, m1, ta, ma
409+ ; CHECK-NEXT: vslideup.vx v18, v14, a0
410+ ; CHECK-NEXT: vsetivli zero, 8, e8, m4, ta, ma
411+ ; CHECK-NEXT: vslidedown.vx v20, v8, a1
412+ ; CHECK-NEXT: vsetvli zero, a2, e8, m1, ta, ma
413+ ; CHECK-NEXT: vslideup.vx v8, v13, a0
414+ ; CHECK-NEXT: vslideup.vx v12, v16, a0
415+ ; CHECK-NEXT: vmv1r.v v9, v18
416+ ; CHECK-NEXT: addi a0, sp, 16
417+ ; CHECK-NEXT: vmv1r.v v13, v20
418+ ; CHECK-NEXT: vmv2r.v v10, v12
419+ ; CHECK-NEXT: vs4r.v v8, (a0)
420+ ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
421+ ; CHECK-NEXT: vlseg7e8.v v8, (a0)
422+ ; CHECK-NEXT: csrr a0, vlenb
423+ ; CHECK-NEXT: slli a0, a0, 2
424+ ; CHECK-NEXT: add sp, sp, a0
425+ ; CHECK-NEXT: .cfi_def_cfa sp, 16
426+ ; CHECK-NEXT: addi sp, sp, 16
427+ ; CHECK-NEXT: .cfi_def_cfa_offset 0
428+ ; CHECK-NEXT: ret
429+ %vec = load <56 x i8 >, ptr %p
430+ %d0 = call {<8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >} @llvm.vector.deinterleave7 (<56 x i8 > %vec )
431+ %t0 = extractvalue { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } %d0 , 0
432+ %t1 = extractvalue { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } %d0 , 1
433+ %t2 = extractvalue { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } %d0 , 2
434+ %t3 = extractvalue { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } %d0 , 3
435+ %t4 = extractvalue { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } %d0 , 4
436+ %t5 = extractvalue { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } %d0 , 5
437+ %t6 = extractvalue { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } %d0 , 6
438+ %res0 = insertvalue { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } poison, <8 x i8 > %t0 , 0
439+ %res1 = insertvalue { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } %res0 , <8 x i8 > %t1 , 1
440+ %res2 = insertvalue { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } %res1 , <8 x i8 > %t2 , 2
441+ %res3 = insertvalue { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } %res2 , <8 x i8 > %t3 , 3
442+ %res4 = insertvalue { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } %res3 , <8 x i8 > %t4 , 4
443+ %res5 = insertvalue { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } %res3 , <8 x i8 > %t5 , 5
444+ %res6 = insertvalue { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } %res3 , <8 x i8 > %t6 , 6
445+ ret { <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 >, <8 x i8 > } %res6
446+ }
447+
284448define {<2 x i32 >, <2 x i32 >, <2 x i32 >, <2 x i32 >, <2 x i32 >, <2 x i32 >, <2 x i32 >, <2 x i32 >} @vector_deinterleave_load_factor8 (ptr %ptr ) {
285449; CHECK-LABEL: vector_deinterleave_load_factor8:
286450; CHECK: # %bb.0:
0 commit comments