@@ -493,3 +493,154 @@ entry:
493493 ret i128 %arg1
494494}
495495
496+ define i16 @addv_zero_lanes_v4i16 (ptr %arr ) {
497+ ; CHECK-SD-LABEL: addv_zero_lanes_v4i16:
498+ ; CHECK-SD: // %bb.0:
499+ ; CHECK-SD-NEXT: ldrb w0, [x0]
500+ ; CHECK-SD-NEXT: ret
501+ ;
502+ ; CHECK-GI-LABEL: addv_zero_lanes_v4i16:
503+ ; CHECK-GI: // %bb.0:
504+ ; CHECK-GI-NEXT: ldrb w8, [x0]
505+ ; CHECK-GI-NEXT: fmov d0, x8
506+ ; CHECK-GI-NEXT: addv h0, v0.4h
507+ ; CHECK-GI-NEXT: fmov w0, s0
508+ ; CHECK-GI-NEXT: ret
509+ %v = load i64 , ptr %arr
510+ %and = and i64 %v , 255
511+ %vec = bitcast i64 %and to <4 x i16 >
512+ %r = call i16 @llvm.vector.reduce.add.v4i16 (<4 x i16 > %vec )
513+ ret i16 %r
514+ }
515+
516+ define i8 @addv_zero_lanes_v8i8 (ptr %arr ) {
517+ ; CHECK-SD-LABEL: addv_zero_lanes_v8i8:
518+ ; CHECK-SD: // %bb.0:
519+ ; CHECK-SD-NEXT: ldrb w0, [x0]
520+ ; CHECK-SD-NEXT: ret
521+ ;
522+ ; CHECK-GI-LABEL: addv_zero_lanes_v8i8:
523+ ; CHECK-GI: // %bb.0:
524+ ; CHECK-GI-NEXT: ldrb w8, [x0]
525+ ; CHECK-GI-NEXT: fmov d0, x8
526+ ; CHECK-GI-NEXT: addv b0, v0.8b
527+ ; CHECK-GI-NEXT: fmov w0, s0
528+ ; CHECK-GI-NEXT: ret
529+ %v = load i64 , ptr %arr
530+ %and = and i64 %v , 255
531+ %vec = bitcast i64 %and to <8 x i8 >
532+ %r = call i8 @llvm.vector.reduce.add.v8i8 (<8 x i8 > %vec )
533+ ret i8 %r
534+ }
535+
536+ define i8 @addv_zero_lanes_negative_v8i8 (ptr %arr ) {
537+ ; CHECK-LABEL: addv_zero_lanes_negative_v8i8:
538+ ; CHECK: // %bb.0:
539+ ; CHECK-NEXT: ldr x8, [x0]
540+ ; CHECK-NEXT: and x8, x8, #0x100
541+ ; CHECK-NEXT: fmov d0, x8
542+ ; CHECK-NEXT: addv b0, v0.8b
543+ ; CHECK-NEXT: fmov w0, s0
544+ ; CHECK-NEXT: ret
545+ %v = load i64 , ptr %arr
546+ %and = and i64 %v , 256
547+ %vec = bitcast i64 %and to <8 x i8 >
548+ %r = call i8 @llvm.vector.reduce.add.v8i8 (<8 x i8 > %vec )
549+ ret i8 %r
550+ }
551+
552+
553+ define i8 @addv_zero_lanes_v16i8 (ptr %arr ) {
554+ ; CHECK-SD-LABEL: addv_zero_lanes_v16i8:
555+ ; CHECK-SD: // %bb.0:
556+ ; CHECK-SD-NEXT: movi v0.2d, #0000000000000000
557+ ; CHECK-SD-NEXT: ldrb w8, [x0]
558+ ; CHECK-SD-NEXT: mov v0.d[0], x8
559+ ; CHECK-SD-NEXT: addv b0, v0.16b
560+ ; CHECK-SD-NEXT: fmov w0, s0
561+ ; CHECK-SD-NEXT: ret
562+ ;
563+ ; CHECK-GI-LABEL: addv_zero_lanes_v16i8:
564+ ; CHECK-GI: // %bb.0:
565+ ; CHECK-GI-NEXT: ldrb w8, [x0]
566+ ; CHECK-GI-NEXT: mov v0.d[0], x8
567+ ; CHECK-GI-NEXT: mov v0.d[1], xzr
568+ ; CHECK-GI-NEXT: addv b0, v0.16b
569+ ; CHECK-GI-NEXT: fmov w0, s0
570+ ; CHECK-GI-NEXT: ret
571+ %v = load i128 , ptr %arr
572+ %and = and i128 %v , 255
573+ %vec = bitcast i128 %and to <16 x i8 >
574+ %r = call i8 @llvm.vector.reduce.add.v16i8 (<16 x i8 > %vec )
575+ ret i8 %r
576+ }
577+
578+ define i16 @addv_zero_lanes_v8i16 (ptr %arr ) {
579+ ; CHECK-SD-LABEL: addv_zero_lanes_v8i16:
580+ ; CHECK-SD: // %bb.0:
581+ ; CHECK-SD-NEXT: movi v0.2d, #0000000000000000
582+ ; CHECK-SD-NEXT: ldrh w8, [x0]
583+ ; CHECK-SD-NEXT: mov v0.d[0], x8
584+ ; CHECK-SD-NEXT: addv h0, v0.8h
585+ ; CHECK-SD-NEXT: fmov w0, s0
586+ ; CHECK-SD-NEXT: ret
587+ ;
588+ ; CHECK-GI-LABEL: addv_zero_lanes_v8i16:
589+ ; CHECK-GI: // %bb.0:
590+ ; CHECK-GI-NEXT: ldrh w8, [x0]
591+ ; CHECK-GI-NEXT: mov v0.d[0], x8
592+ ; CHECK-GI-NEXT: mov v0.d[1], xzr
593+ ; CHECK-GI-NEXT: addv h0, v0.8h
594+ ; CHECK-GI-NEXT: fmov w0, s0
595+ ; CHECK-GI-NEXT: ret
596+ %v = load i128 , ptr %arr
597+ %and = and i128 %v , u0xFFFF
598+ %vec = bitcast i128 %and to <8 x i16 >
599+ %r = call i16 @llvm.vector.reduce.add.v8i16 (<8 x i16 > %vec )
600+ ret i16 %r
601+ }
602+
603+ define i32 @addv_zero_lanes_v4i32 (ptr %arr ) {
604+ ; CHECK-SD-LABEL: addv_zero_lanes_v4i32:
605+ ; CHECK-SD: // %bb.0:
606+ ; CHECK-SD-NEXT: movi v0.2d, #0000000000000000
607+ ; CHECK-SD-NEXT: ldr w8, [x0]
608+ ; CHECK-SD-NEXT: mov v0.d[0], x8
609+ ; CHECK-SD-NEXT: addv s0, v0.4s
610+ ; CHECK-SD-NEXT: fmov w0, s0
611+ ; CHECK-SD-NEXT: ret
612+ ;
613+ ; CHECK-GI-LABEL: addv_zero_lanes_v4i32:
614+ ; CHECK-GI: // %bb.0:
615+ ; CHECK-GI-NEXT: ldr w8, [x0]
616+ ; CHECK-GI-NEXT: mov v0.d[0], x8
617+ ; CHECK-GI-NEXT: mov v0.d[1], xzr
618+ ; CHECK-GI-NEXT: addv s0, v0.4s
619+ ; CHECK-GI-NEXT: fmov w0, s0
620+ ; CHECK-GI-NEXT: ret
621+ %v = load i128 , ptr %arr
622+ %and = and i128 %v , u0xFFFFFFFF
623+ %vec = bitcast i128 %and to <4 x i32 >
624+ %r = call i32 @llvm.vector.reduce.add.v4i32 (<4 x i32 > %vec )
625+ ret i32 %r
626+ }
627+
628+ define i32 @addv_zero_lanes_v2i32 (ptr %arr ) {
629+ ; CHECK-SD-LABEL: addv_zero_lanes_v2i32:
630+ ; CHECK-SD: // %bb.0:
631+ ; CHECK-SD-NEXT: ldr w0, [x0]
632+ ; CHECK-SD-NEXT: ret
633+ ;
634+ ; CHECK-GI-LABEL: addv_zero_lanes_v2i32:
635+ ; CHECK-GI: // %bb.0:
636+ ; CHECK-GI-NEXT: ldr w8, [x0]
637+ ; CHECK-GI-NEXT: fmov d0, x8
638+ ; CHECK-GI-NEXT: addp v0.2s, v0.2s, v0.2s
639+ ; CHECK-GI-NEXT: fmov w0, s0
640+ ; CHECK-GI-NEXT: ret
641+ %v = load i64 , ptr %arr
642+ %and = and i64 %v , u0xFFFFFFFF
643+ %vec = bitcast i64 %and to <2 x i32 >
644+ %r = call i32 @llvm.vector.reduce.add.v2i32 (<2 x i32 > %vec )
645+ ret i32 %r
646+ }
0 commit comments