22; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
33; RUN: llc < %s -mtriple=aarch64 -mattr -neon -aarch64-neon-syntax=apple | FileCheck -check-prefix=CHECK-NONEON %s
44; RUN: llc < %s -mtriple=aarch64 -mattr +cssc -aarch64-neon-syntax=apple | FileCheck -check-prefix=CHECK-CSSC %s
5+ ; RUN: llc < %s -mtriple=aarch64_be-none-eabi | FileCheck %s --check-prefix=CHECK-BE
56
67define i32 @cnt32_advsimd (i32 %x ) nounwind readnone {
78; CHECK-LABEL: cnt32_advsimd:
@@ -32,6 +33,14 @@ define i32 @cnt32_advsimd(i32 %x) nounwind readnone {
3233; CHECK-CSSC: // %bb.0:
3334; CHECK-CSSC-NEXT: cnt w0, w0
3435; CHECK-CSSC-NEXT: ret
36+ ;
37+ ; CHECK-BE-LABEL: cnt32_advsimd:
38+ ; CHECK-BE: // %bb.0:
39+ ; CHECK-BE-NEXT: fmov s0, w0
40+ ; CHECK-BE-NEXT: cnt v0.8b, v0.8b
41+ ; CHECK-BE-NEXT: addv b0, v0.8b
42+ ; CHECK-BE-NEXT: fmov w0, s0
43+ ; CHECK-BE-NEXT: ret
3544 %cnt = tail call i32 @llvm.ctpop.i32 (i32 %x )
3645 ret i32 %cnt
3746}
@@ -69,6 +78,16 @@ define i32 @cnt32_advsimd_2(<2 x i32> %x) {
6978; CHECK-CSSC-NEXT: fmov w8, s0
7079; CHECK-CSSC-NEXT: cnt w0, w8
7180; CHECK-CSSC-NEXT: ret
81+ ;
82+ ; CHECK-BE-LABEL: cnt32_advsimd_2:
83+ ; CHECK-BE: // %bb.0:
84+ ; CHECK-BE-NEXT: rev64 v0.2s, v0.2s
85+ ; CHECK-BE-NEXT: fmov w8, s0
86+ ; CHECK-BE-NEXT: fmov s0, w8
87+ ; CHECK-BE-NEXT: cnt v0.8b, v0.8b
88+ ; CHECK-BE-NEXT: addv b0, v0.8b
89+ ; CHECK-BE-NEXT: fmov w0, s0
90+ ; CHECK-BE-NEXT: ret
7291 %1 = extractelement <2 x i32 > %x , i64 0
7392 %2 = tail call i32 @llvm.ctpop.i32 (i32 %1 )
7493 ret i32 %2
@@ -103,6 +122,16 @@ define i64 @cnt64_advsimd(i64 %x) nounwind readnone {
103122; CHECK-CSSC: // %bb.0:
104123; CHECK-CSSC-NEXT: cnt x0, x0
105124; CHECK-CSSC-NEXT: ret
125+ ;
126+ ; CHECK-BE-LABEL: cnt64_advsimd:
127+ ; CHECK-BE: // %bb.0:
128+ ; CHECK-BE-NEXT: fmov d0, x0
129+ ; CHECK-BE-NEXT: rev64 v0.8b, v0.8b
130+ ; CHECK-BE-NEXT: cnt v0.8b, v0.8b
131+ ; CHECK-BE-NEXT: addv b0, v0.8b
132+ ; CHECK-BE-NEXT: rev64 v0.8b, v0.8b
133+ ; CHECK-BE-NEXT: fmov x0, d0
134+ ; CHECK-BE-NEXT: ret
106135 %cnt = tail call i64 @llvm.ctpop.i64 (i64 %x )
107136 ret i64 %cnt
108137}
@@ -147,6 +176,22 @@ define i32 @cnt32(i32 %x) nounwind readnone noimplicitfloat {
147176; CHECK-CSSC: // %bb.0:
148177; CHECK-CSSC-NEXT: cnt w0, w0
149178; CHECK-CSSC-NEXT: ret
179+ ;
180+ ; CHECK-BE-LABEL: cnt32:
181+ ; CHECK-BE: // %bb.0:
182+ ; CHECK-BE-NEXT: lsr w9, w0, #1
183+ ; CHECK-BE-NEXT: mov w8, #16843009 // =0x1010101
184+ ; CHECK-BE-NEXT: and w9, w9, #0x55555555
185+ ; CHECK-BE-NEXT: sub w9, w0, w9
186+ ; CHECK-BE-NEXT: lsr w10, w9, #2
187+ ; CHECK-BE-NEXT: and w9, w9, #0x33333333
188+ ; CHECK-BE-NEXT: and w10, w10, #0x33333333
189+ ; CHECK-BE-NEXT: add w9, w9, w10
190+ ; CHECK-BE-NEXT: add w9, w9, w9, lsr #4
191+ ; CHECK-BE-NEXT: and w9, w9, #0xf0f0f0f
192+ ; CHECK-BE-NEXT: mul w8, w9, w8
193+ ; CHECK-BE-NEXT: lsr w0, w8, #24
194+ ; CHECK-BE-NEXT: ret
150195 %cnt = tail call i32 @llvm.ctpop.i32 (i32 %x )
151196 ret i32 %cnt
152197}
@@ -188,6 +233,22 @@ define i64 @cnt64(i64 %x) nounwind readnone noimplicitfloat {
188233; CHECK-CSSC: // %bb.0:
189234; CHECK-CSSC-NEXT: cnt x0, x0
190235; CHECK-CSSC-NEXT: ret
236+ ;
237+ ; CHECK-BE-LABEL: cnt64:
238+ ; CHECK-BE: // %bb.0:
239+ ; CHECK-BE-NEXT: lsr x9, x0, #1
240+ ; CHECK-BE-NEXT: mov x8, #72340172838076673 // =0x101010101010101
241+ ; CHECK-BE-NEXT: and x9, x9, #0x5555555555555555
242+ ; CHECK-BE-NEXT: sub x9, x0, x9
243+ ; CHECK-BE-NEXT: lsr x10, x9, #2
244+ ; CHECK-BE-NEXT: and x9, x9, #0x3333333333333333
245+ ; CHECK-BE-NEXT: and x10, x10, #0x3333333333333333
246+ ; CHECK-BE-NEXT: add x9, x9, x10
247+ ; CHECK-BE-NEXT: add x9, x9, x9, lsr #4
248+ ; CHECK-BE-NEXT: and x9, x9, #0xf0f0f0f0f0f0f0f
249+ ; CHECK-BE-NEXT: mul x8, x9, x8
250+ ; CHECK-BE-NEXT: lsr x0, x8, #56
251+ ; CHECK-BE-NEXT: ret
191252 %cnt = tail call i64 @llvm.ctpop.i64 (i64 %x )
192253 ret i64 %cnt
193254}
@@ -215,6 +276,14 @@ define i32 @ctpop_eq_one(i64 %x) nounwind readnone {
215276; CHECK-CSSC-NEXT: cmp x8, #1
216277; CHECK-CSSC-NEXT: cset w0, eq
217278; CHECK-CSSC-NEXT: ret
279+ ;
280+ ; CHECK-BE-LABEL: ctpop_eq_one:
281+ ; CHECK-BE: // %bb.0:
282+ ; CHECK-BE-NEXT: sub x8, x0, #1
283+ ; CHECK-BE-NEXT: eor x9, x0, x8
284+ ; CHECK-BE-NEXT: cmp x9, x8
285+ ; CHECK-BE-NEXT: cset w0, hi
286+ ; CHECK-BE-NEXT: ret
218287 %count = tail call i64 @llvm.ctpop.i64 (i64 %x )
219288 %cmp = icmp eq i64 %count , 1
220289 %conv = zext i1 %cmp to i32
@@ -244,6 +313,14 @@ define i32 @ctpop_ne_one(i64 %x) nounwind readnone {
244313; CHECK-CSSC-NEXT: cmp x8, #1
245314; CHECK-CSSC-NEXT: cset w0, ne
246315; CHECK-CSSC-NEXT: ret
316+ ;
317+ ; CHECK-BE-LABEL: ctpop_ne_one:
318+ ; CHECK-BE: // %bb.0:
319+ ; CHECK-BE-NEXT: sub x8, x0, #1
320+ ; CHECK-BE-NEXT: eor x9, x0, x8
321+ ; CHECK-BE-NEXT: cmp x9, x8
322+ ; CHECK-BE-NEXT: cset w0, ls
323+ ; CHECK-BE-NEXT: ret
247324 %count = tail call i64 @llvm.ctpop.i64 (i64 %x )
248325 %cmp = icmp ne i64 %count , 1
249326 %conv = zext i1 %cmp to i32
@@ -273,6 +350,14 @@ define i1 @ctpop32_ne_one(i32 %x) nounwind readnone {
273350; CHECK-CSSC-NEXT: cmp w8, #1
274351; CHECK-CSSC-NEXT: cset w0, ne
275352; CHECK-CSSC-NEXT: ret
353+ ;
354+ ; CHECK-BE-LABEL: ctpop32_ne_one:
355+ ; CHECK-BE: // %bb.0:
356+ ; CHECK-BE-NEXT: sub w8, w0, #1
357+ ; CHECK-BE-NEXT: eor w9, w0, w8
358+ ; CHECK-BE-NEXT: cmp w9, w8
359+ ; CHECK-BE-NEXT: cset w0, ls
360+ ; CHECK-BE-NEXT: ret
276361 %count = tail call i32 @llvm.ctpop.i32 (i32 %x )
277362 %cmp = icmp ne i32 %count , 1
278363 ret i1 %cmp
@@ -299,6 +384,13 @@ define i1 @ctpop32_eq_one_nonzero(i32 %x) {
299384; CHECK-CSSC-NEXT: tst w0, w8
300385; CHECK-CSSC-NEXT: cset w0, eq
301386; CHECK-CSSC-NEXT: ret
387+ ;
388+ ; CHECK-BE-LABEL: ctpop32_eq_one_nonzero:
389+ ; CHECK-BE: // %bb.0: // %entry
390+ ; CHECK-BE-NEXT: sub w8, w0, #1
391+ ; CHECK-BE-NEXT: tst w0, w8
392+ ; CHECK-BE-NEXT: cset w0, eq
393+ ; CHECK-BE-NEXT: ret
302394entry:
303395 %popcnt = call range(i32 1 , 33 ) i32 @llvm.ctpop.i32 (i32 %x )
304396 %cmp = icmp eq i32 %popcnt , 1
@@ -326,11 +418,80 @@ define i1 @ctpop32_ne_one_nonzero(i32 %x) {
326418; CHECK-CSSC-NEXT: tst w0, w8
327419; CHECK-CSSC-NEXT: cset w0, ne
328420; CHECK-CSSC-NEXT: ret
421+ ;
422+ ; CHECK-BE-LABEL: ctpop32_ne_one_nonzero:
423+ ; CHECK-BE: // %bb.0: // %entry
424+ ; CHECK-BE-NEXT: sub w8, w0, #1
425+ ; CHECK-BE-NEXT: tst w0, w8
426+ ; CHECK-BE-NEXT: cset w0, ne
427+ ; CHECK-BE-NEXT: ret
329428entry:
330429 %popcnt = tail call range(i32 1 , 33 ) i32 @llvm.ctpop.i32 (i32 %x )
331430 %cmp = icmp ne i32 %popcnt , 1
332431 ret i1 %cmp
333432}
334433
434+ define i128 @cnt128 (i128 %x ) nounwind readnone {
435+ ; CHECK-LABEL: cnt128:
436+ ; CHECK: // %bb.0:
437+ ; CHECK-NEXT: fmov d0, x0
438+ ; CHECK-NEXT: mov.d v0[1], x1
439+ ; CHECK-NEXT: cnt.16b v0, v0
440+ ; CHECK-NEXT: addv.16b b0, v0
441+ ; CHECK-NEXT: mov.d x1, v0[1]
442+ ; CHECK-NEXT: fmov x0, d0
443+ ; CHECK-NEXT: ret
444+ ;
445+ ; CHECK-NONEON-LABEL: cnt128:
446+ ; CHECK-NONEON: // %bb.0:
447+ ; CHECK-NONEON-NEXT: lsr x9, x0, #1
448+ ; CHECK-NONEON-NEXT: lsr x10, x1, #1
449+ ; CHECK-NONEON-NEXT: mov x8, #72340172838076673 // =0x101010101010101
450+ ; CHECK-NONEON-NEXT: and x9, x9, #0x5555555555555555
451+ ; CHECK-NONEON-NEXT: and x10, x10, #0x5555555555555555
452+ ; CHECK-NONEON-NEXT: sub x9, x0, x9
453+ ; CHECK-NONEON-NEXT: sub x10, x1, x10
454+ ; CHECK-NONEON-NEXT: mov x1, xzr
455+ ; CHECK-NONEON-NEXT: lsr x11, x9, #2
456+ ; CHECK-NONEON-NEXT: lsr x12, x10, #2
457+ ; CHECK-NONEON-NEXT: and x9, x9, #0x3333333333333333
458+ ; CHECK-NONEON-NEXT: and x10, x10, #0x3333333333333333
459+ ; CHECK-NONEON-NEXT: and x11, x11, #0x3333333333333333
460+ ; CHECK-NONEON-NEXT: add x9, x9, x11
461+ ; CHECK-NONEON-NEXT: and x11, x12, #0x3333333333333333
462+ ; CHECK-NONEON-NEXT: add x9, x9, x9, lsr #4
463+ ; CHECK-NONEON-NEXT: add x10, x10, x11
464+ ; CHECK-NONEON-NEXT: add x10, x10, x10, lsr #4
465+ ; CHECK-NONEON-NEXT: and x9, x9, #0xf0f0f0f0f0f0f0f
466+ ; CHECK-NONEON-NEXT: mul x9, x9, x8
467+ ; CHECK-NONEON-NEXT: and x10, x10, #0xf0f0f0f0f0f0f0f
468+ ; CHECK-NONEON-NEXT: mul x8, x10, x8
469+ ; CHECK-NONEON-NEXT: lsr x9, x9, #56
470+ ; CHECK-NONEON-NEXT: add x0, x9, x8, lsr #56
471+ ; CHECK-NONEON-NEXT: ret
472+ ;
473+ ; CHECK-CSSC-LABEL: cnt128:
474+ ; CHECK-CSSC: // %bb.0:
475+ ; CHECK-CSSC-NEXT: cnt x8, x1
476+ ; CHECK-CSSC-NEXT: cnt x9, x0
477+ ; CHECK-CSSC-NEXT: mov x1, xzr
478+ ; CHECK-CSSC-NEXT: add x0, x9, x8
479+ ; CHECK-CSSC-NEXT: ret
480+ ;
481+ ; CHECK-BE-LABEL: cnt128:
482+ ; CHECK-BE: // %bb.0:
483+ ; CHECK-BE-NEXT: fmov d0, x0
484+ ; CHECK-BE-NEXT: mov v0.d[1], x1
485+ ; CHECK-BE-NEXT: rev64 v0.16b, v0.16b
486+ ; CHECK-BE-NEXT: cnt v0.16b, v0.16b
487+ ; CHECK-BE-NEXT: addv b0, v0.16b
488+ ; CHECK-BE-NEXT: rev64 v0.16b, v0.16b
489+ ; CHECK-BE-NEXT: mov x1, v0.d[1]
490+ ; CHECK-BE-NEXT: fmov x0, d0
491+ ; CHECK-BE-NEXT: ret
492+ %cnt = tail call i128 @llvm.ctpop.i128 (i128 %x )
493+ ret i128 %cnt
494+ }
495+
335496declare i32 @llvm.ctpop.i32 (i32 ) nounwind readnone
336497declare i64 @llvm.ctpop.i64 (i64 ) nounwind readnone
0 commit comments