@@ -648,4 +648,113 @@ Entry:
648648 ret <4 x i16 > %1
649649}
650650
651+ define i32 @ctpop_into_extract (ptr %p ) {
652+ ; CHECKO0-LABEL: ctpop_into_extract:
653+ ; CHECKO0: // %bb.0:
654+ ; CHECKO0-NEXT: mov w8, #-1 // =0xffffffff
655+ ; CHECKO0-NEXT: // implicit-def: $d1
656+ ; CHECKO0-NEXT: // implicit-def: $q0
657+ ; CHECKO0-NEXT: fmov d0, d1
658+ ; CHECKO0-NEXT: mov v0.s[0], w8
659+ ; CHECKO0-NEXT: fmov d2, d0
660+ ; CHECKO0-NEXT: ldr d0, [x0]
661+ ; CHECKO0-NEXT: fmov s1, s0
662+ ; CHECKO0-NEXT: fmov w8, s1
663+ ; CHECKO0-NEXT: fmov s1, w8
664+ ; CHECKO0-NEXT: // kill: def $d1 killed $s1
665+ ; CHECKO0-NEXT: cnt v1.8b, v1.8b
666+ ; CHECKO0-NEXT: uaddlv h1, v1.8b
667+ ; CHECKO0-NEXT: // kill: def $q1 killed $h1
668+ ; CHECKO0-NEXT: // kill: def $s1 killed $s1 killed $q1
669+ ; CHECKO0-NEXT: fmov w8, s1
670+ ; CHECKO0-NEXT: // implicit-def: $q1
671+ ; CHECKO0-NEXT: fmov d1, d2
672+ ; CHECKO0-NEXT: mov v1.s[1], w8
673+ ; CHECKO0-NEXT: // kill: def $d1 killed $d1 killed $q1
674+ ; CHECKO0-NEXT: sub v0.2s, v0.2s, v1.2s
675+ ; CHECKO0-NEXT: str d0, [x0]
676+ ; CHECKO0-NEXT: mov w0, wzr
677+ ; CHECKO0-NEXT: ret
678+ ;
679+ ; CHECK-LABEL: ctpop_into_extract:
680+ ; CHECK: // %bb.0:
681+ ; CHECK-NEXT: ldr d0, [x0]
682+ ; CHECK-NEXT: movi v2.2d, #0xffffffffffffffff
683+ ; CHECK-NEXT: mov x8, x0
684+ ; CHECK-NEXT: mov w0, wzr
685+ ; CHECK-NEXT: fmov w9, s0
686+ ; CHECK-NEXT: fmov s1, w9
687+ ; CHECK-NEXT: cnt v1.8b, v1.8b
688+ ; CHECK-NEXT: addv b1, v1.8b
689+ ; CHECK-NEXT: mov v2.s[1], v1.s[0]
690+ ; CHECK-NEXT: sub v0.2s, v0.2s, v2.2s
691+ ; CHECK-NEXT: str d0, [x8]
692+ ; CHECK-NEXT: ret
693+ ;
694+ ; BE-LABEL: ctpop_into_extract:
695+ ; BE: // %bb.0:
696+ ; BE-NEXT: ld1 { v0.2s }, [x0]
697+ ; BE-NEXT: movi v2.2d, #0xffffffffffffffff
698+ ; BE-NEXT: mov x8, x0
699+ ; BE-NEXT: mov w0, wzr
700+ ; BE-NEXT: fmov w9, s0
701+ ; BE-NEXT: fmov s1, w9
702+ ; BE-NEXT: cnt v1.8b, v1.8b
703+ ; BE-NEXT: addv b1, v1.8b
704+ ; BE-NEXT: mov v2.s[1], v1.s[0]
705+ ; BE-NEXT: sub v0.2s, v0.2s, v2.2s
706+ ; BE-NEXT: st1 { v0.2s }, [x8]
707+ ; BE-NEXT: ret
708+ ;
709+ ; GISEL-LABEL: ctpop_into_extract:
710+ ; GISEL: // %bb.0:
711+ ; GISEL-NEXT: ldr d0, [x0]
712+ ; GISEL-NEXT: mov w9, #-1 // =0xffffffff
713+ ; GISEL-NEXT: mov x8, x0
714+ ; GISEL-NEXT: mov v2.s[0], w9
715+ ; GISEL-NEXT: mov w0, wzr
716+ ; GISEL-NEXT: fmov w10, s0
717+ ; GISEL-NEXT: fmov s1, w10
718+ ; GISEL-NEXT: cnt v1.8b, v1.8b
719+ ; GISEL-NEXT: uaddlv h1, v1.8b
720+ ; GISEL-NEXT: mov v2.s[1], v1.s[0]
721+ ; GISEL-NEXT: sub v0.2s, v0.2s, v2.2s
722+ ; GISEL-NEXT: str d0, [x8]
723+ ; GISEL-NEXT: ret
724+ ;
725+ ; GISELO0-LABEL: ctpop_into_extract:
726+ ; GISELO0: // %bb.0:
727+ ; GISELO0-NEXT: mov w8, #-1 // =0xffffffff
728+ ; GISELO0-NEXT: // implicit-def: $d1
729+ ; GISELO0-NEXT: // implicit-def: $q0
730+ ; GISELO0-NEXT: fmov d0, d1
731+ ; GISELO0-NEXT: mov v0.s[0], w8
732+ ; GISELO0-NEXT: fmov d2, d0
733+ ; GISELO0-NEXT: ldr d0, [x0]
734+ ; GISELO0-NEXT: fmov s1, s0
735+ ; GISELO0-NEXT: fmov w8, s1
736+ ; GISELO0-NEXT: fmov s1, w8
737+ ; GISELO0-NEXT: // kill: def $d1 killed $s1
738+ ; GISELO0-NEXT: cnt v1.8b, v1.8b
739+ ; GISELO0-NEXT: uaddlv h1, v1.8b
740+ ; GISELO0-NEXT: // kill: def $q1 killed $h1
741+ ; GISELO0-NEXT: // kill: def $s1 killed $s1 killed $q1
742+ ; GISELO0-NEXT: fmov w8, s1
743+ ; GISELO0-NEXT: // implicit-def: $q1
744+ ; GISELO0-NEXT: fmov d1, d2
745+ ; GISELO0-NEXT: mov v1.s[1], w8
746+ ; GISELO0-NEXT: // kill: def $d1 killed $d1 killed $q1
747+ ; GISELO0-NEXT: sub v0.2s, v0.2s, v1.2s
748+ ; GISELO0-NEXT: str d0, [x0]
749+ ; GISELO0-NEXT: mov w0, wzr
750+ ; GISELO0-NEXT: ret
751+ %1 = load <2 x i32 >, ptr %p , align 4
752+ %2 = extractelement <2 x i32 > %1 , i64 0
753+ %3 = call i32 @llvm.ctpop.i32 (i32 %2 )
754+ %4 = insertelement <2 x i32 > <i32 -1 , i32 poison>, i32 %3 , i64 1
755+ %5 = sub <2 x i32 > %1 , %4
756+ store <2 x i32 > %5 , ptr %p , align 4
757+ ret i32 0
758+ }
759+
651760declare <4 x i16 > @llvm.ctpop.v4i16 (<4 x i16 >)
0 commit comments