@@ -493,3 +493,154 @@ entry:
493
493
ret i128 %arg1
494
494
}
495
495
496
+ define i16 @addv_zero_lanes_v4i16 (ptr %arr ) {
497
+ ; CHECK-SD-LABEL: addv_zero_lanes_v4i16:
498
+ ; CHECK-SD: // %bb.0:
499
+ ; CHECK-SD-NEXT: ldrb w0, [x0]
500
+ ; CHECK-SD-NEXT: ret
501
+ ;
502
+ ; CHECK-GI-LABEL: addv_zero_lanes_v4i16:
503
+ ; CHECK-GI: // %bb.0:
504
+ ; CHECK-GI-NEXT: ldrb w8, [x0]
505
+ ; CHECK-GI-NEXT: fmov d0, x8
506
+ ; CHECK-GI-NEXT: addv h0, v0.4h
507
+ ; CHECK-GI-NEXT: fmov w0, s0
508
+ ; CHECK-GI-NEXT: ret
509
+ %v = load i64 , ptr %arr
510
+ %and = and i64 %v , 255
511
+ %vec = bitcast i64 %and to <4 x i16 >
512
+ %r = call i16 @llvm.vector.reduce.add.v4i16 (<4 x i16 > %vec )
513
+ ret i16 %r
514
+ }
515
+
516
+ define i8 @addv_zero_lanes_v8i8 (ptr %arr ) {
517
+ ; CHECK-SD-LABEL: addv_zero_lanes_v8i8:
518
+ ; CHECK-SD: // %bb.0:
519
+ ; CHECK-SD-NEXT: ldrb w0, [x0]
520
+ ; CHECK-SD-NEXT: ret
521
+ ;
522
+ ; CHECK-GI-LABEL: addv_zero_lanes_v8i8:
523
+ ; CHECK-GI: // %bb.0:
524
+ ; CHECK-GI-NEXT: ldrb w8, [x0]
525
+ ; CHECK-GI-NEXT: fmov d0, x8
526
+ ; CHECK-GI-NEXT: addv b0, v0.8b
527
+ ; CHECK-GI-NEXT: fmov w0, s0
528
+ ; CHECK-GI-NEXT: ret
529
+ %v = load i64 , ptr %arr
530
+ %and = and i64 %v , 255
531
+ %vec = bitcast i64 %and to <8 x i8 >
532
+ %r = call i8 @llvm.vector.reduce.add.v8i8 (<8 x i8 > %vec )
533
+ ret i8 %r
534
+ }
535
+
536
+ define i8 @addv_zero_lanes_negative_v8i8 (ptr %arr ) {
537
+ ; CHECK-LABEL: addv_zero_lanes_negative_v8i8:
538
+ ; CHECK: // %bb.0:
539
+ ; CHECK-NEXT: ldr x8, [x0]
540
+ ; CHECK-NEXT: and x8, x8, #0x100
541
+ ; CHECK-NEXT: fmov d0, x8
542
+ ; CHECK-NEXT: addv b0, v0.8b
543
+ ; CHECK-NEXT: fmov w0, s0
544
+ ; CHECK-NEXT: ret
545
+ %v = load i64 , ptr %arr
546
+ %and = and i64 %v , 256
547
+ %vec = bitcast i64 %and to <8 x i8 >
548
+ %r = call i8 @llvm.vector.reduce.add.v8i8 (<8 x i8 > %vec )
549
+ ret i8 %r
550
+ }
551
+
552
+
553
+ define i8 @addv_zero_lanes_v16i8 (ptr %arr ) {
554
+ ; CHECK-SD-LABEL: addv_zero_lanes_v16i8:
555
+ ; CHECK-SD: // %bb.0:
556
+ ; CHECK-SD-NEXT: movi v0.2d, #0000000000000000
557
+ ; CHECK-SD-NEXT: ldrb w8, [x0]
558
+ ; CHECK-SD-NEXT: mov v0.d[0], x8
559
+ ; CHECK-SD-NEXT: addv b0, v0.16b
560
+ ; CHECK-SD-NEXT: fmov w0, s0
561
+ ; CHECK-SD-NEXT: ret
562
+ ;
563
+ ; CHECK-GI-LABEL: addv_zero_lanes_v16i8:
564
+ ; CHECK-GI: // %bb.0:
565
+ ; CHECK-GI-NEXT: ldrb w8, [x0]
566
+ ; CHECK-GI-NEXT: mov v0.d[0], x8
567
+ ; CHECK-GI-NEXT: mov v0.d[1], xzr
568
+ ; CHECK-GI-NEXT: addv b0, v0.16b
569
+ ; CHECK-GI-NEXT: fmov w0, s0
570
+ ; CHECK-GI-NEXT: ret
571
+ %v = load i128 , ptr %arr
572
+ %and = and i128 %v , 255
573
+ %vec = bitcast i128 %and to <16 x i8 >
574
+ %r = call i8 @llvm.vector.reduce.add.v16i8 (<16 x i8 > %vec )
575
+ ret i8 %r
576
+ }
577
+
578
+ define i16 @addv_zero_lanes_v8i16 (ptr %arr ) {
579
+ ; CHECK-SD-LABEL: addv_zero_lanes_v8i16:
580
+ ; CHECK-SD: // %bb.0:
581
+ ; CHECK-SD-NEXT: movi v0.2d, #0000000000000000
582
+ ; CHECK-SD-NEXT: ldrh w8, [x0]
583
+ ; CHECK-SD-NEXT: mov v0.d[0], x8
584
+ ; CHECK-SD-NEXT: addv h0, v0.8h
585
+ ; CHECK-SD-NEXT: fmov w0, s0
586
+ ; CHECK-SD-NEXT: ret
587
+ ;
588
+ ; CHECK-GI-LABEL: addv_zero_lanes_v8i16:
589
+ ; CHECK-GI: // %bb.0:
590
+ ; CHECK-GI-NEXT: ldrh w8, [x0]
591
+ ; CHECK-GI-NEXT: mov v0.d[0], x8
592
+ ; CHECK-GI-NEXT: mov v0.d[1], xzr
593
+ ; CHECK-GI-NEXT: addv h0, v0.8h
594
+ ; CHECK-GI-NEXT: fmov w0, s0
595
+ ; CHECK-GI-NEXT: ret
596
+ %v = load i128 , ptr %arr
597
+ %and = and i128 %v , u0xFFFF
598
+ %vec = bitcast i128 %and to <8 x i16 >
599
+ %r = call i16 @llvm.vector.reduce.add.v8i16 (<8 x i16 > %vec )
600
+ ret i16 %r
601
+ }
602
+
603
+ define i32 @addv_zero_lanes_v4i32 (ptr %arr ) {
604
+ ; CHECK-SD-LABEL: addv_zero_lanes_v4i32:
605
+ ; CHECK-SD: // %bb.0:
606
+ ; CHECK-SD-NEXT: movi v0.2d, #0000000000000000
607
+ ; CHECK-SD-NEXT: ldr w8, [x0]
608
+ ; CHECK-SD-NEXT: mov v0.d[0], x8
609
+ ; CHECK-SD-NEXT: addv s0, v0.4s
610
+ ; CHECK-SD-NEXT: fmov w0, s0
611
+ ; CHECK-SD-NEXT: ret
612
+ ;
613
+ ; CHECK-GI-LABEL: addv_zero_lanes_v4i32:
614
+ ; CHECK-GI: // %bb.0:
615
+ ; CHECK-GI-NEXT: ldr w8, [x0]
616
+ ; CHECK-GI-NEXT: mov v0.d[0], x8
617
+ ; CHECK-GI-NEXT: mov v0.d[1], xzr
618
+ ; CHECK-GI-NEXT: addv s0, v0.4s
619
+ ; CHECK-GI-NEXT: fmov w0, s0
620
+ ; CHECK-GI-NEXT: ret
621
+ %v = load i128 , ptr %arr
622
+ %and = and i128 %v , u0xFFFFFFFF
623
+ %vec = bitcast i128 %and to <4 x i32 >
624
+ %r = call i32 @llvm.vector.reduce.add.v4i32 (<4 x i32 > %vec )
625
+ ret i32 %r
626
+ }
627
+
628
+ define i32 @addv_zero_lanes_v2i32 (ptr %arr ) {
629
+ ; CHECK-SD-LABEL: addv_zero_lanes_v2i32:
630
+ ; CHECK-SD: // %bb.0:
631
+ ; CHECK-SD-NEXT: ldr w0, [x0]
632
+ ; CHECK-SD-NEXT: ret
633
+ ;
634
+ ; CHECK-GI-LABEL: addv_zero_lanes_v2i32:
635
+ ; CHECK-GI: // %bb.0:
636
+ ; CHECK-GI-NEXT: ldr w8, [x0]
637
+ ; CHECK-GI-NEXT: fmov d0, x8
638
+ ; CHECK-GI-NEXT: addp v0.2s, v0.2s, v0.2s
639
+ ; CHECK-GI-NEXT: fmov w0, s0
640
+ ; CHECK-GI-NEXT: ret
641
+ %v = load i64 , ptr %arr
642
+ %and = and i64 %v , u0xFFFFFFFF
643
+ %vec = bitcast i64 %and to <2 x i32 >
644
+ %r = call i32 @llvm.vector.reduce.add.v2i32 (<2 x i32 > %vec )
645
+ ret i32 %r
646
+ }
0 commit comments