@@ -308,3 +308,244 @@ define <32 x i32> @v32i32_v4i32(<4 x i32>) {
308308 %2 = shufflevector <4 x i32 > %0 , <4 x i32 > poison, <32 x i32 > <i32 2 , i32 3 , i32 0 , i32 2 , i32 3 , i32 0 , i32 1 , i32 2 , i32 3 , i32 0 , i32 2 , i32 3 , i32 0 , i32 1 , i32 1 , i32 2 , i32 0 , i32 3 , i32 1 , i32 1 , i32 2 , i32 0 , i32 3 , i32 1 , i32 2 , i32 0 , i32 3 , i32 1 , i32 1 , i32 2 , i32 0 , i32 3 >
309309 ret <32 x i32 > %2
310310}
311+
312+ ; TODO: This case should be a simple vnsrl, but gets scalarized instead
313+ define <32 x i8 > @vnsrl_v32i8_v64i8 (<64 x i8 > %in ) {
314+ ; RV32-LABEL: v32i8_v64i8:
315+ ; RV32: # %bb.0:
316+ ; RV32-NEXT: addi sp, sp, -128
317+ ; RV32-NEXT: .cfi_def_cfa_offset 128
318+ ; RV32-NEXT: sw ra, 124(sp) # 4-byte Folded Spill
319+ ; RV32-NEXT: sw s0, 120(sp) # 4-byte Folded Spill
320+ ; RV32-NEXT: .cfi_offset ra, -4
321+ ; RV32-NEXT: .cfi_offset s0, -8
322+ ; RV32-NEXT: addi s0, sp, 128
323+ ; RV32-NEXT: .cfi_def_cfa s0, 0
324+ ; RV32-NEXT: andi sp, sp, -64
325+ ; RV32-NEXT: li a0, 64
326+ ; RV32-NEXT: mv a1, sp
327+ ; RV32-NEXT: vsetvli zero, a0, e8, m4, ta, ma
328+ ; RV32-NEXT: vse8.v v8, (a1)
329+ ; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
330+ ; RV32-NEXT: vslidedown.vi v10, v8, 1
331+ ; RV32-NEXT: vmv.x.s a0, v10
332+ ; RV32-NEXT: li a1, 32
333+ ; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
334+ ; RV32-NEXT: vmv.v.x v10, a0
335+ ; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
336+ ; RV32-NEXT: vslidedown.vi v12, v8, 3
337+ ; RV32-NEXT: vmv.x.s a0, v12
338+ ; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
339+ ; RV32-NEXT: vslide1down.vx v10, v10, a0
340+ ; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
341+ ; RV32-NEXT: vslidedown.vi v12, v8, 5
342+ ; RV32-NEXT: vmv.x.s a0, v12
343+ ; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
344+ ; RV32-NEXT: vslide1down.vx v10, v10, a0
345+ ; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
346+ ; RV32-NEXT: vslidedown.vi v12, v8, 7
347+ ; RV32-NEXT: vmv.x.s a0, v12
348+ ; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
349+ ; RV32-NEXT: vslide1down.vx v10, v10, a0
350+ ; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
351+ ; RV32-NEXT: vslidedown.vi v12, v8, 9
352+ ; RV32-NEXT: vmv.x.s a0, v12
353+ ; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
354+ ; RV32-NEXT: vslide1down.vx v10, v10, a0
355+ ; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
356+ ; RV32-NEXT: vslidedown.vi v12, v8, 11
357+ ; RV32-NEXT: vmv.x.s a0, v12
358+ ; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
359+ ; RV32-NEXT: vslide1down.vx v10, v10, a0
360+ ; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
361+ ; RV32-NEXT: vslidedown.vi v12, v8, 13
362+ ; RV32-NEXT: vmv.x.s a0, v12
363+ ; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
364+ ; RV32-NEXT: vslide1down.vx v10, v10, a0
365+ ; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
366+ ; RV32-NEXT: vslidedown.vi v12, v8, 15
367+ ; RV32-NEXT: vmv.x.s a0, v12
368+ ; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
369+ ; RV32-NEXT: vslide1down.vx v10, v10, a0
370+ ; RV32-NEXT: vslidedown.vi v12, v8, 17
371+ ; RV32-NEXT: vmv.x.s a0, v12
372+ ; RV32-NEXT: vslide1down.vx v10, v10, a0
373+ ; RV32-NEXT: vslidedown.vi v12, v8, 19
374+ ; RV32-NEXT: vmv.x.s a0, v12
375+ ; RV32-NEXT: vslide1down.vx v10, v10, a0
376+ ; RV32-NEXT: vslidedown.vi v12, v8, 21
377+ ; RV32-NEXT: vmv.x.s a0, v12
378+ ; RV32-NEXT: vslide1down.vx v10, v10, a0
379+ ; RV32-NEXT: vslidedown.vi v12, v8, 23
380+ ; RV32-NEXT: vmv.x.s a0, v12
381+ ; RV32-NEXT: vslide1down.vx v10, v10, a0
382+ ; RV32-NEXT: vslidedown.vi v12, v8, 25
383+ ; RV32-NEXT: vmv.x.s a0, v12
384+ ; RV32-NEXT: vslide1down.vx v10, v10, a0
385+ ; RV32-NEXT: vslidedown.vi v12, v8, 27
386+ ; RV32-NEXT: vmv.x.s a0, v12
387+ ; RV32-NEXT: vslide1down.vx v10, v10, a0
388+ ; RV32-NEXT: vslidedown.vi v12, v8, 29
389+ ; RV32-NEXT: vmv.x.s a0, v12
390+ ; RV32-NEXT: vslide1down.vx v10, v10, a0
391+ ; RV32-NEXT: vslidedown.vi v8, v8, 31
392+ ; RV32-NEXT: vmv.x.s a0, v8
393+ ; RV32-NEXT: vslide1down.vx v8, v10, a0
394+ ; RV32-NEXT: lbu a0, 33(sp)
395+ ; RV32-NEXT: lbu a1, 35(sp)
396+ ; RV32-NEXT: lbu a2, 37(sp)
397+ ; RV32-NEXT: lbu a3, 39(sp)
398+ ; RV32-NEXT: vslide1down.vx v8, v8, a0
399+ ; RV32-NEXT: vslide1down.vx v8, v8, a1
400+ ; RV32-NEXT: vslide1down.vx v8, v8, a2
401+ ; RV32-NEXT: vslide1down.vx v8, v8, a3
402+ ; RV32-NEXT: lbu a0, 41(sp)
403+ ; RV32-NEXT: lbu a1, 43(sp)
404+ ; RV32-NEXT: lbu a2, 45(sp)
405+ ; RV32-NEXT: lbu a3, 47(sp)
406+ ; RV32-NEXT: vslide1down.vx v8, v8, a0
407+ ; RV32-NEXT: vslide1down.vx v8, v8, a1
408+ ; RV32-NEXT: vslide1down.vx v8, v8, a2
409+ ; RV32-NEXT: vslide1down.vx v8, v8, a3
410+ ; RV32-NEXT: lbu a0, 49(sp)
411+ ; RV32-NEXT: lbu a1, 51(sp)
412+ ; RV32-NEXT: lbu a2, 53(sp)
413+ ; RV32-NEXT: lbu a3, 55(sp)
414+ ; RV32-NEXT: vslide1down.vx v8, v8, a0
415+ ; RV32-NEXT: vslide1down.vx v8, v8, a1
416+ ; RV32-NEXT: vslide1down.vx v8, v8, a2
417+ ; RV32-NEXT: vslide1down.vx v8, v8, a3
418+ ; RV32-NEXT: lbu a0, 57(sp)
419+ ; RV32-NEXT: lbu a1, 59(sp)
420+ ; RV32-NEXT: lbu a2, 61(sp)
421+ ; RV32-NEXT: lbu a3, 63(sp)
422+ ; RV32-NEXT: vslide1down.vx v8, v8, a0
423+ ; RV32-NEXT: vslide1down.vx v8, v8, a1
424+ ; RV32-NEXT: vslide1down.vx v8, v8, a2
425+ ; RV32-NEXT: vslide1down.vx v8, v8, a3
426+ ; RV32-NEXT: addi sp, s0, -128
427+ ; RV32-NEXT: lw ra, 124(sp) # 4-byte Folded Reload
428+ ; RV32-NEXT: lw s0, 120(sp) # 4-byte Folded Reload
429+ ; RV32-NEXT: addi sp, sp, 128
430+ ; RV32-NEXT: ret
431+ ;
432+ ; RV64-LABEL: v32i8_v64i8:
433+ ; RV64: # %bb.0:
434+ ; RV64-NEXT: addi sp, sp, -128
435+ ; RV64-NEXT: .cfi_def_cfa_offset 128
436+ ; RV64-NEXT: sd ra, 120(sp) # 8-byte Folded Spill
437+ ; RV64-NEXT: sd s0, 112(sp) # 8-byte Folded Spill
438+ ; RV64-NEXT: .cfi_offset ra, -8
439+ ; RV64-NEXT: .cfi_offset s0, -16
440+ ; RV64-NEXT: addi s0, sp, 128
441+ ; RV64-NEXT: .cfi_def_cfa s0, 0
442+ ; RV64-NEXT: andi sp, sp, -64
443+ ; RV64-NEXT: li a0, 64
444+ ; RV64-NEXT: mv a1, sp
445+ ; RV64-NEXT: vsetvli zero, a0, e8, m4, ta, ma
446+ ; RV64-NEXT: vse8.v v8, (a1)
447+ ; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
448+ ; RV64-NEXT: vslidedown.vi v10, v8, 1
449+ ; RV64-NEXT: vmv.x.s a0, v10
450+ ; RV64-NEXT: li a1, 32
451+ ; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
452+ ; RV64-NEXT: vmv.v.x v10, a0
453+ ; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
454+ ; RV64-NEXT: vslidedown.vi v12, v8, 3
455+ ; RV64-NEXT: vmv.x.s a0, v12
456+ ; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
457+ ; RV64-NEXT: vslide1down.vx v10, v10, a0
458+ ; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
459+ ; RV64-NEXT: vslidedown.vi v12, v8, 5
460+ ; RV64-NEXT: vmv.x.s a0, v12
461+ ; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
462+ ; RV64-NEXT: vslide1down.vx v10, v10, a0
463+ ; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
464+ ; RV64-NEXT: vslidedown.vi v12, v8, 7
465+ ; RV64-NEXT: vmv.x.s a0, v12
466+ ; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
467+ ; RV64-NEXT: vslide1down.vx v10, v10, a0
468+ ; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
469+ ; RV64-NEXT: vslidedown.vi v12, v8, 9
470+ ; RV64-NEXT: vmv.x.s a0, v12
471+ ; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
472+ ; RV64-NEXT: vslide1down.vx v10, v10, a0
473+ ; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
474+ ; RV64-NEXT: vslidedown.vi v12, v8, 11
475+ ; RV64-NEXT: vmv.x.s a0, v12
476+ ; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
477+ ; RV64-NEXT: vslide1down.vx v10, v10, a0
478+ ; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
479+ ; RV64-NEXT: vslidedown.vi v12, v8, 13
480+ ; RV64-NEXT: vmv.x.s a0, v12
481+ ; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
482+ ; RV64-NEXT: vslide1down.vx v10, v10, a0
483+ ; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
484+ ; RV64-NEXT: vslidedown.vi v12, v8, 15
485+ ; RV64-NEXT: vmv.x.s a0, v12
486+ ; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
487+ ; RV64-NEXT: vslide1down.vx v10, v10, a0
488+ ; RV64-NEXT: vslidedown.vi v12, v8, 17
489+ ; RV64-NEXT: vmv.x.s a0, v12
490+ ; RV64-NEXT: vslide1down.vx v10, v10, a0
491+ ; RV64-NEXT: vslidedown.vi v12, v8, 19
492+ ; RV64-NEXT: vmv.x.s a0, v12
493+ ; RV64-NEXT: vslide1down.vx v10, v10, a0
494+ ; RV64-NEXT: vslidedown.vi v12, v8, 21
495+ ; RV64-NEXT: vmv.x.s a0, v12
496+ ; RV64-NEXT: vslide1down.vx v10, v10, a0
497+ ; RV64-NEXT: vslidedown.vi v12, v8, 23
498+ ; RV64-NEXT: vmv.x.s a0, v12
499+ ; RV64-NEXT: vslide1down.vx v10, v10, a0
500+ ; RV64-NEXT: vslidedown.vi v12, v8, 25
501+ ; RV64-NEXT: vmv.x.s a0, v12
502+ ; RV64-NEXT: vslide1down.vx v10, v10, a0
503+ ; RV64-NEXT: vslidedown.vi v12, v8, 27
504+ ; RV64-NEXT: vmv.x.s a0, v12
505+ ; RV64-NEXT: vslide1down.vx v10, v10, a0
506+ ; RV64-NEXT: vslidedown.vi v12, v8, 29
507+ ; RV64-NEXT: vmv.x.s a0, v12
508+ ; RV64-NEXT: vslide1down.vx v10, v10, a0
509+ ; RV64-NEXT: vslidedown.vi v8, v8, 31
510+ ; RV64-NEXT: vmv.x.s a0, v8
511+ ; RV64-NEXT: vslide1down.vx v8, v10, a0
512+ ; RV64-NEXT: lbu a0, 33(sp)
513+ ; RV64-NEXT: lbu a1, 35(sp)
514+ ; RV64-NEXT: lbu a2, 37(sp)
515+ ; RV64-NEXT: lbu a3, 39(sp)
516+ ; RV64-NEXT: vslide1down.vx v8, v8, a0
517+ ; RV64-NEXT: vslide1down.vx v8, v8, a1
518+ ; RV64-NEXT: vslide1down.vx v8, v8, a2
519+ ; RV64-NEXT: vslide1down.vx v8, v8, a3
520+ ; RV64-NEXT: lbu a0, 41(sp)
521+ ; RV64-NEXT: lbu a1, 43(sp)
522+ ; RV64-NEXT: lbu a2, 45(sp)
523+ ; RV64-NEXT: lbu a3, 47(sp)
524+ ; RV64-NEXT: vslide1down.vx v8, v8, a0
525+ ; RV64-NEXT: vslide1down.vx v8, v8, a1
526+ ; RV64-NEXT: vslide1down.vx v8, v8, a2
527+ ; RV64-NEXT: vslide1down.vx v8, v8, a3
528+ ; RV64-NEXT: lbu a0, 49(sp)
529+ ; RV64-NEXT: lbu a1, 51(sp)
530+ ; RV64-NEXT: lbu a2, 53(sp)
531+ ; RV64-NEXT: lbu a3, 55(sp)
532+ ; RV64-NEXT: vslide1down.vx v8, v8, a0
533+ ; RV64-NEXT: vslide1down.vx v8, v8, a1
534+ ; RV64-NEXT: vslide1down.vx v8, v8, a2
535+ ; RV64-NEXT: vslide1down.vx v8, v8, a3
536+ ; RV64-NEXT: lbu a0, 57(sp)
537+ ; RV64-NEXT: lbu a1, 59(sp)
538+ ; RV64-NEXT: lbu a2, 61(sp)
539+ ; RV64-NEXT: lbu a3, 63(sp)
540+ ; RV64-NEXT: vslide1down.vx v8, v8, a0
541+ ; RV64-NEXT: vslide1down.vx v8, v8, a1
542+ ; RV64-NEXT: vslide1down.vx v8, v8, a2
543+ ; RV64-NEXT: vslide1down.vx v8, v8, a3
544+ ; RV64-NEXT: addi sp, s0, -128
545+ ; RV64-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
546+ ; RV64-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
547+ ; RV64-NEXT: addi sp, sp, 128
548+ ; RV64-NEXT: ret
549+ %res = shufflevector <64 x i8 > %in , <64 x i8 > poison, <32 x i32 > <i32 1 , i32 3 , i32 5 , i32 7 , i32 9 , i32 11 , i32 13 , i32 15 , i32 17 , i32 19 , i32 21 , i32 23 , i32 25 , i32 27 , i32 29 , i32 31 , i32 33 , i32 35 , i32 37 , i32 39 , i32 41 , i32 43 , i32 45 , i32 47 , i32 49 , i32 51 , i32 53 , i32 55 , i32 57 , i32 59 , i32 61 , i32 63 >
550+ ret <32 x i8 > %res
551+ }
0 commit comments