@@ -281,6 +281,121 @@ define <4 x i32> @vclsQs32(ptr %A) nounwind {
281281 ret <4 x i32 > %tmp2
282282}
283283
284+ define i32 @ctpop8 (i8 %x ) nounwind readnone {
285+ ; CHECK-LABEL: ctpop8:
286+ ; CHECK: @ %bb.0:
287+ ; CHECK-NEXT: mov r1, #85
288+ ; CHECK-NEXT: and r1, r1, r0, lsr #1
289+ ; CHECK-NEXT: sub r0, r0, r1
290+ ; CHECK-NEXT: mov r1, #51
291+ ; CHECK-NEXT: and r1, r1, r0, lsr #2
292+ ; CHECK-NEXT: and r0, r0, #51
293+ ; CHECK-NEXT: add r0, r0, r1
294+ ; CHECK-NEXT: add r0, r0, r0, lsr #4
295+ ; CHECK-NEXT: and r0, r0, #15
296+ ; CHECK-NEXT: mov pc, lr
297+ %count = tail call i8 @llvm.ctpop.i8 (i8 %x )
298+ %conv = zext i8 %count to i32
299+ ret i32 %conv
300+ }
301+
302+ define i32 @ctpop16 (i16 %x ) nounwind readnone {
303+ ; CHECK-LABEL: ctpop16:
304+ ; CHECK: @ %bb.0:
305+ ; CHECK-NEXT: mov r1, #85
306+ ; CHECK-NEXT: orr r1, r1, #21760
307+ ; CHECK-NEXT: and r1, r1, r0, lsr #1
308+ ; CHECK-NEXT: sub r0, r0, r1
309+ ; CHECK-NEXT: mov r1, #51
310+ ; CHECK-NEXT: orr r1, r1, #13056
311+ ; CHECK-NEXT: and r2, r0, r1
312+ ; CHECK-NEXT: and r0, r1, r0, lsr #2
313+ ; CHECK-NEXT: add r0, r2, r0
314+ ; CHECK-NEXT: add r0, r0, r0, lsr #4
315+ ; CHECK-NEXT: and r1, r0, #3840
316+ ; CHECK-NEXT: and r0, r0, #15
317+ ; CHECK-NEXT: add r0, r0, r1, lsr #8
318+ ; CHECK-NEXT: mov pc, lr
319+ %count = tail call i16 @llvm.ctpop.i16 (i16 %x )
320+ %conv = zext i16 %count to i32
321+ ret i32 %conv
322+ }
323+
324+ define i32 @ctpop32 (i32 %x ) nounwind readnone {
325+ ; CHECK-LABEL: ctpop32:
326+ ; CHECK: @ %bb.0:
327+ ; CHECK-NEXT: ldr r1, .LCPI22_0
328+ ; CHECK-NEXT: ldr r2, .LCPI22_3
329+ ; CHECK-NEXT: and r1, r1, r0, lsr #1
330+ ; CHECK-NEXT: ldr r12, .LCPI22_1
331+ ; CHECK-NEXT: sub r0, r0, r1
332+ ; CHECK-NEXT: ldr r3, .LCPI22_2
333+ ; CHECK-NEXT: and r1, r0, r2
334+ ; CHECK-NEXT: and r0, r2, r0, lsr #2
335+ ; CHECK-NEXT: add r0, r1, r0
336+ ; CHECK-NEXT: add r0, r0, r0, lsr #4
337+ ; CHECK-NEXT: and r0, r0, r12
338+ ; CHECK-NEXT: mul r1, r0, r3
339+ ; CHECK-NEXT: lsr r0, r1, #24
340+ ; CHECK-NEXT: mov pc, lr
341+ ; CHECK-NEXT: .p2align 2
342+ ; CHECK-NEXT: @ %bb.1:
343+ ; CHECK-NEXT: .LCPI22_0:
344+ ; CHECK-NEXT: .long 1431655765 @ 0x55555555
345+ ; CHECK-NEXT: .LCPI22_1:
346+ ; CHECK-NEXT: .long 252645135 @ 0xf0f0f0f
347+ ; CHECK-NEXT: .LCPI22_2:
348+ ; CHECK-NEXT: .long 16843009 @ 0x1010101
349+ ; CHECK-NEXT: .LCPI22_3:
350+ ; CHECK-NEXT: .long 858993459 @ 0x33333333
351+ %count = tail call i32 @llvm.ctpop.i32 (i32 %x )
352+ ret i32 %count
353+ }
354+
355+ define i32 @ctpop64 (i64 %x ) nounwind readnone {
356+ ; CHECK-LABEL: ctpop64:
357+ ; CHECK: @ %bb.0:
358+ ; CHECK-NEXT: .save {r4, lr}
359+ ; CHECK-NEXT: push {r4, lr}
360+ ; CHECK-NEXT: ldr r2, .LCPI23_0
361+ ; CHECK-NEXT: ldr r3, .LCPI23_3
362+ ; CHECK-NEXT: and r4, r2, r0, lsr #1
363+ ; CHECK-NEXT: and r2, r2, r1, lsr #1
364+ ; CHECK-NEXT: sub r0, r0, r4
365+ ; CHECK-NEXT: sub r1, r1, r2
366+ ; CHECK-NEXT: and r4, r0, r3
367+ ; CHECK-NEXT: and r2, r1, r3
368+ ; CHECK-NEXT: and r0, r3, r0, lsr #2
369+ ; CHECK-NEXT: and r1, r3, r1, lsr #2
370+ ; CHECK-NEXT: add r0, r4, r0
371+ ; CHECK-NEXT: ldr lr, .LCPI23_1
372+ ; CHECK-NEXT: add r1, r2, r1
373+ ; CHECK-NEXT: ldr r12, .LCPI23_2
374+ ; CHECK-NEXT: add r0, r0, r0, lsr #4
375+ ; CHECK-NEXT: and r0, r0, lr
376+ ; CHECK-NEXT: add r1, r1, r1, lsr #4
377+ ; CHECK-NEXT: mul r2, r0, r12
378+ ; CHECK-NEXT: and r0, r1, lr
379+ ; CHECK-NEXT: mul r1, r0, r12
380+ ; CHECK-NEXT: lsr r0, r2, #24
381+ ; CHECK-NEXT: add r0, r0, r1, lsr #24
382+ ; CHECK-NEXT: pop {r4, lr}
383+ ; CHECK-NEXT: mov pc, lr
384+ ; CHECK-NEXT: .p2align 2
385+ ; CHECK-NEXT: @ %bb.1:
386+ ; CHECK-NEXT: .LCPI23_0:
387+ ; CHECK-NEXT: .long 1431655765 @ 0x55555555
388+ ; CHECK-NEXT: .LCPI23_1:
389+ ; CHECK-NEXT: .long 252645135 @ 0xf0f0f0f
390+ ; CHECK-NEXT: .LCPI23_2:
391+ ; CHECK-NEXT: .long 16843009 @ 0x1010101
392+ ; CHECK-NEXT: .LCPI23_3:
393+ ; CHECK-NEXT: .long 858993459 @ 0x33333333
394+ %count = tail call i64 @llvm.ctpop.i64 (i64 %x )
395+ %conv = trunc i64 %count to i32
396+ ret i32 %conv
397+ }
398+
284399define i32 @ctpop_eq_one (i64 %x ) nounwind readnone {
285400; CHECK-LABEL: ctpop_eq_one:
286401; CHECK: @ %bb.0:
@@ -299,6 +414,9 @@ define i32 @ctpop_eq_one(i64 %x) nounwind readnone {
299414 ret i32 %conv
300415}
301416
417+ declare i8 @llvm.ctpop.i8 (i8 ) nounwind readnone
418+ declare i16 @llvm.ctpop.i16 (i16 ) nounwind readnone
419+ declare i32 @llvm.ctpop.i32 (i32 ) nounwind readnone
302420declare i64 @llvm.ctpop.i64 (i64 ) nounwind readnone
303421
304422declare <8 x i8 > @llvm.arm.neon.vcls.v8i8 (<8 x i8 >) nounwind readnone
0 commit comments