@@ -307,243 +307,14 @@ define <32 x i32> @v32i32_v4i32(<4 x i32>) {
307307 ret <32 x i32 > %2
308308}
309309
310- ; TODO: This case should be a simple vnsrl, but gets scalarized instead
311310define <32 x i8 > @vnsrl_v32i8_v64i8 (<64 x i8 > %in ) {
312- ; RV32-LABEL: vnsrl_v32i8_v64i8:
313- ; RV32: # %bb.0:
314- ; RV32-NEXT: addi sp, sp, -128
315- ; RV32-NEXT: .cfi_def_cfa_offset 128
316- ; RV32-NEXT: sw ra, 124(sp) # 4-byte Folded Spill
317- ; RV32-NEXT: sw s0, 120(sp) # 4-byte Folded Spill
318- ; RV32-NEXT: .cfi_offset ra, -4
319- ; RV32-NEXT: .cfi_offset s0, -8
320- ; RV32-NEXT: addi s0, sp, 128
321- ; RV32-NEXT: .cfi_def_cfa s0, 0
322- ; RV32-NEXT: andi sp, sp, -64
323- ; RV32-NEXT: li a0, 64
324- ; RV32-NEXT: mv a1, sp
325- ; RV32-NEXT: vsetvli zero, a0, e8, m4, ta, ma
326- ; RV32-NEXT: vse8.v v8, (a1)
327- ; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
328- ; RV32-NEXT: vslidedown.vi v10, v8, 1
329- ; RV32-NEXT: vmv.x.s a0, v10
330- ; RV32-NEXT: li a1, 32
331- ; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
332- ; RV32-NEXT: vmv.v.x v10, a0
333- ; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
334- ; RV32-NEXT: vslidedown.vi v12, v8, 3
335- ; RV32-NEXT: vmv.x.s a0, v12
336- ; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
337- ; RV32-NEXT: vslide1down.vx v10, v10, a0
338- ; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
339- ; RV32-NEXT: vslidedown.vi v12, v8, 5
340- ; RV32-NEXT: vmv.x.s a0, v12
341- ; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
342- ; RV32-NEXT: vslide1down.vx v10, v10, a0
343- ; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
344- ; RV32-NEXT: vslidedown.vi v12, v8, 7
345- ; RV32-NEXT: vmv.x.s a0, v12
346- ; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
347- ; RV32-NEXT: vslide1down.vx v10, v10, a0
348- ; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
349- ; RV32-NEXT: vslidedown.vi v12, v8, 9
350- ; RV32-NEXT: vmv.x.s a0, v12
351- ; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
352- ; RV32-NEXT: vslide1down.vx v10, v10, a0
353- ; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
354- ; RV32-NEXT: vslidedown.vi v12, v8, 11
355- ; RV32-NEXT: vmv.x.s a0, v12
356- ; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
357- ; RV32-NEXT: vslide1down.vx v10, v10, a0
358- ; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
359- ; RV32-NEXT: vslidedown.vi v12, v8, 13
360- ; RV32-NEXT: vmv.x.s a0, v12
361- ; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
362- ; RV32-NEXT: vslide1down.vx v10, v10, a0
363- ; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
364- ; RV32-NEXT: vslidedown.vi v12, v8, 15
365- ; RV32-NEXT: vmv.x.s a0, v12
366- ; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
367- ; RV32-NEXT: vslide1down.vx v10, v10, a0
368- ; RV32-NEXT: vslidedown.vi v12, v8, 17
369- ; RV32-NEXT: vmv.x.s a0, v12
370- ; RV32-NEXT: vslide1down.vx v10, v10, a0
371- ; RV32-NEXT: vslidedown.vi v12, v8, 19
372- ; RV32-NEXT: vmv.x.s a0, v12
373- ; RV32-NEXT: vslide1down.vx v10, v10, a0
374- ; RV32-NEXT: vslidedown.vi v12, v8, 21
375- ; RV32-NEXT: vmv.x.s a0, v12
376- ; RV32-NEXT: vslide1down.vx v10, v10, a0
377- ; RV32-NEXT: vslidedown.vi v12, v8, 23
378- ; RV32-NEXT: vmv.x.s a0, v12
379- ; RV32-NEXT: vslide1down.vx v10, v10, a0
380- ; RV32-NEXT: vslidedown.vi v12, v8, 25
381- ; RV32-NEXT: vmv.x.s a0, v12
382- ; RV32-NEXT: vslide1down.vx v10, v10, a0
383- ; RV32-NEXT: vslidedown.vi v12, v8, 27
384- ; RV32-NEXT: vmv.x.s a0, v12
385- ; RV32-NEXT: vslide1down.vx v10, v10, a0
386- ; RV32-NEXT: vslidedown.vi v12, v8, 29
387- ; RV32-NEXT: vmv.x.s a0, v12
388- ; RV32-NEXT: vslide1down.vx v10, v10, a0
389- ; RV32-NEXT: vslidedown.vi v8, v8, 31
390- ; RV32-NEXT: vmv.x.s a0, v8
391- ; RV32-NEXT: vslide1down.vx v8, v10, a0
392- ; RV32-NEXT: lbu a0, 33(sp)
393- ; RV32-NEXT: lbu a1, 35(sp)
394- ; RV32-NEXT: lbu a2, 37(sp)
395- ; RV32-NEXT: lbu a3, 39(sp)
396- ; RV32-NEXT: vslide1down.vx v8, v8, a0
397- ; RV32-NEXT: vslide1down.vx v8, v8, a1
398- ; RV32-NEXT: vslide1down.vx v8, v8, a2
399- ; RV32-NEXT: vslide1down.vx v8, v8, a3
400- ; RV32-NEXT: lbu a0, 41(sp)
401- ; RV32-NEXT: lbu a1, 43(sp)
402- ; RV32-NEXT: lbu a2, 45(sp)
403- ; RV32-NEXT: lbu a3, 47(sp)
404- ; RV32-NEXT: vslide1down.vx v8, v8, a0
405- ; RV32-NEXT: vslide1down.vx v8, v8, a1
406- ; RV32-NEXT: vslide1down.vx v8, v8, a2
407- ; RV32-NEXT: vslide1down.vx v8, v8, a3
408- ; RV32-NEXT: lbu a0, 49(sp)
409- ; RV32-NEXT: lbu a1, 51(sp)
410- ; RV32-NEXT: lbu a2, 53(sp)
411- ; RV32-NEXT: lbu a3, 55(sp)
412- ; RV32-NEXT: vslide1down.vx v8, v8, a0
413- ; RV32-NEXT: vslide1down.vx v8, v8, a1
414- ; RV32-NEXT: vslide1down.vx v8, v8, a2
415- ; RV32-NEXT: vslide1down.vx v8, v8, a3
416- ; RV32-NEXT: lbu a0, 57(sp)
417- ; RV32-NEXT: lbu a1, 59(sp)
418- ; RV32-NEXT: lbu a2, 61(sp)
419- ; RV32-NEXT: lbu a3, 63(sp)
420- ; RV32-NEXT: vslide1down.vx v8, v8, a0
421- ; RV32-NEXT: vslide1down.vx v8, v8, a1
422- ; RV32-NEXT: vslide1down.vx v8, v8, a2
423- ; RV32-NEXT: vslide1down.vx v8, v8, a3
424- ; RV32-NEXT: addi sp, s0, -128
425- ; RV32-NEXT: lw ra, 124(sp) # 4-byte Folded Reload
426- ; RV32-NEXT: lw s0, 120(sp) # 4-byte Folded Reload
427- ; RV32-NEXT: addi sp, sp, 128
428- ; RV32-NEXT: ret
429- ;
430- ; RV64-LABEL: vnsrl_v32i8_v64i8:
431- ; RV64: # %bb.0:
432- ; RV64-NEXT: addi sp, sp, -128
433- ; RV64-NEXT: .cfi_def_cfa_offset 128
434- ; RV64-NEXT: sd ra, 120(sp) # 8-byte Folded Spill
435- ; RV64-NEXT: sd s0, 112(sp) # 8-byte Folded Spill
436- ; RV64-NEXT: .cfi_offset ra, -8
437- ; RV64-NEXT: .cfi_offset s0, -16
438- ; RV64-NEXT: addi s0, sp, 128
439- ; RV64-NEXT: .cfi_def_cfa s0, 0
440- ; RV64-NEXT: andi sp, sp, -64
441- ; RV64-NEXT: li a0, 64
442- ; RV64-NEXT: mv a1, sp
443- ; RV64-NEXT: vsetvli zero, a0, e8, m4, ta, ma
444- ; RV64-NEXT: vse8.v v8, (a1)
445- ; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
446- ; RV64-NEXT: vslidedown.vi v10, v8, 1
447- ; RV64-NEXT: vmv.x.s a0, v10
448- ; RV64-NEXT: li a1, 32
449- ; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
450- ; RV64-NEXT: vmv.v.x v10, a0
451- ; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
452- ; RV64-NEXT: vslidedown.vi v12, v8, 3
453- ; RV64-NEXT: vmv.x.s a0, v12
454- ; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
455- ; RV64-NEXT: vslide1down.vx v10, v10, a0
456- ; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
457- ; RV64-NEXT: vslidedown.vi v12, v8, 5
458- ; RV64-NEXT: vmv.x.s a0, v12
459- ; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
460- ; RV64-NEXT: vslide1down.vx v10, v10, a0
461- ; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
462- ; RV64-NEXT: vslidedown.vi v12, v8, 7
463- ; RV64-NEXT: vmv.x.s a0, v12
464- ; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
465- ; RV64-NEXT: vslide1down.vx v10, v10, a0
466- ; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
467- ; RV64-NEXT: vslidedown.vi v12, v8, 9
468- ; RV64-NEXT: vmv.x.s a0, v12
469- ; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
470- ; RV64-NEXT: vslide1down.vx v10, v10, a0
471- ; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
472- ; RV64-NEXT: vslidedown.vi v12, v8, 11
473- ; RV64-NEXT: vmv.x.s a0, v12
474- ; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
475- ; RV64-NEXT: vslide1down.vx v10, v10, a0
476- ; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
477- ; RV64-NEXT: vslidedown.vi v12, v8, 13
478- ; RV64-NEXT: vmv.x.s a0, v12
479- ; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
480- ; RV64-NEXT: vslide1down.vx v10, v10, a0
481- ; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
482- ; RV64-NEXT: vslidedown.vi v12, v8, 15
483- ; RV64-NEXT: vmv.x.s a0, v12
484- ; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
485- ; RV64-NEXT: vslide1down.vx v10, v10, a0
486- ; RV64-NEXT: vslidedown.vi v12, v8, 17
487- ; RV64-NEXT: vmv.x.s a0, v12
488- ; RV64-NEXT: vslide1down.vx v10, v10, a0
489- ; RV64-NEXT: vslidedown.vi v12, v8, 19
490- ; RV64-NEXT: vmv.x.s a0, v12
491- ; RV64-NEXT: vslide1down.vx v10, v10, a0
492- ; RV64-NEXT: vslidedown.vi v12, v8, 21
493- ; RV64-NEXT: vmv.x.s a0, v12
494- ; RV64-NEXT: vslide1down.vx v10, v10, a0
495- ; RV64-NEXT: vslidedown.vi v12, v8, 23
496- ; RV64-NEXT: vmv.x.s a0, v12
497- ; RV64-NEXT: vslide1down.vx v10, v10, a0
498- ; RV64-NEXT: vslidedown.vi v12, v8, 25
499- ; RV64-NEXT: vmv.x.s a0, v12
500- ; RV64-NEXT: vslide1down.vx v10, v10, a0
501- ; RV64-NEXT: vslidedown.vi v12, v8, 27
502- ; RV64-NEXT: vmv.x.s a0, v12
503- ; RV64-NEXT: vslide1down.vx v10, v10, a0
504- ; RV64-NEXT: vslidedown.vi v12, v8, 29
505- ; RV64-NEXT: vmv.x.s a0, v12
506- ; RV64-NEXT: vslide1down.vx v10, v10, a0
507- ; RV64-NEXT: vslidedown.vi v8, v8, 31
508- ; RV64-NEXT: vmv.x.s a0, v8
509- ; RV64-NEXT: vslide1down.vx v8, v10, a0
510- ; RV64-NEXT: lbu a0, 33(sp)
511- ; RV64-NEXT: lbu a1, 35(sp)
512- ; RV64-NEXT: lbu a2, 37(sp)
513- ; RV64-NEXT: lbu a3, 39(sp)
514- ; RV64-NEXT: vslide1down.vx v8, v8, a0
515- ; RV64-NEXT: vslide1down.vx v8, v8, a1
516- ; RV64-NEXT: vslide1down.vx v8, v8, a2
517- ; RV64-NEXT: vslide1down.vx v8, v8, a3
518- ; RV64-NEXT: lbu a0, 41(sp)
519- ; RV64-NEXT: lbu a1, 43(sp)
520- ; RV64-NEXT: lbu a2, 45(sp)
521- ; RV64-NEXT: lbu a3, 47(sp)
522- ; RV64-NEXT: vslide1down.vx v8, v8, a0
523- ; RV64-NEXT: vslide1down.vx v8, v8, a1
524- ; RV64-NEXT: vslide1down.vx v8, v8, a2
525- ; RV64-NEXT: vslide1down.vx v8, v8, a3
526- ; RV64-NEXT: lbu a0, 49(sp)
527- ; RV64-NEXT: lbu a1, 51(sp)
528- ; RV64-NEXT: lbu a2, 53(sp)
529- ; RV64-NEXT: lbu a3, 55(sp)
530- ; RV64-NEXT: vslide1down.vx v8, v8, a0
531- ; RV64-NEXT: vslide1down.vx v8, v8, a1
532- ; RV64-NEXT: vslide1down.vx v8, v8, a2
533- ; RV64-NEXT: vslide1down.vx v8, v8, a3
534- ; RV64-NEXT: lbu a0, 57(sp)
535- ; RV64-NEXT: lbu a1, 59(sp)
536- ; RV64-NEXT: lbu a2, 61(sp)
537- ; RV64-NEXT: lbu a3, 63(sp)
538- ; RV64-NEXT: vslide1down.vx v8, v8, a0
539- ; RV64-NEXT: vslide1down.vx v8, v8, a1
540- ; RV64-NEXT: vslide1down.vx v8, v8, a2
541- ; RV64-NEXT: vslide1down.vx v8, v8, a3
542- ; RV64-NEXT: addi sp, s0, -128
543- ; RV64-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
544- ; RV64-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
545- ; RV64-NEXT: addi sp, sp, 128
546- ; RV64-NEXT: ret
311+ ; CHECK-LABEL: vnsrl_v32i8_v64i8:
312+ ; CHECK: # %bb.0:
313+ ; CHECK-NEXT: li a0, 32
314+ ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
315+ ; CHECK-NEXT: vnsrl.wi v12, v8, 8
316+ ; CHECK-NEXT: vmv.v.v v8, v12
317+ ; CHECK-NEXT: ret
547318 %res = shufflevector <64 x i8 > %in , <64 x i8 > poison, <32 x i32 > <i32 1 , i32 3 , i32 5 , i32 7 , i32 9 , i32 11 , i32 13 , i32 15 , i32 17 , i32 19 , i32 21 , i32 23 , i32 25 , i32 27 , i32 29 , i32 31 , i32 33 , i32 35 , i32 37 , i32 39 , i32 41 , i32 43 , i32 45 , i32 47 , i32 49 , i32 51 , i32 53 , i32 55 , i32 57 , i32 59 , i32 61 , i32 63 >
548319 ret <32 x i8 > %res
549320}
0 commit comments