Skip to content

Commit 9d215e5

Browse files
authored
reimplements clz using the branchless/loopless algorithm (#1423)
Inspired by Benjamin Mourad (@bmourad01) implementation in #1416. It is not only efficient but also helps with bap-veri-fication, as the latter do not support while loops (which is used in the previous implementation).
1 parent 594c9cc commit 9d215e5

File tree

4 files changed

+67
-13
lines changed

4 files changed

+67
-13
lines changed

lib/arm/arm_lifter.ml

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -335,18 +335,6 @@ let lift_bits mem ops (insn : bits_insn ) =
335335
extract 7 0 s ^
336336
extract 15 8 s) in
337337
exec [assn (Env.of_reg dest) rev] cond
338-
| `CLZ, [|`Reg dest; src; cond; _|] ->
339-
let shift = tmp ~name:"shift" reg32_t in
340-
let accum = tmp ~name:"accum" reg32_t in
341-
Bil.(exec [
342-
shift := exp_of_op src;
343-
accum := int32 32;
344-
while_ (var shift <> int32 0) [
345-
shift := var shift lsr int32 1;
346-
accum := var accum - int32 1;
347-
];
348-
Env.of_reg dest := var accum;
349-
]) cond
350338
| insn,ops ->
351339
fail [%here] "ops %s doesn't match bits insn %s"
352340
(string_of_ops ops) (Arm_insn.to_string (insn :> insn))

lib/arm/arm_types.ml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,6 @@ type bits_insn = [
159159
| `UXTH
160160
| `REV
161161
| `REV16
162-
| `CLZ
163162
] [@@deriving bin_io, compare, sexp, enumerate]
164163

165164
type mult_insn = [

plugins/arm/semantics/arm.lisp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
(declare (context (target arm)))
2+
(defpackage llvm-armv7 (:use arm))
3+
(in-package arm)
4+
5+
(require bits)
6+
7+
(defun CLZ (rd rn pre _)
8+
(when (condition-holds pre)
9+
(set$ rd (clz32 rn))))

plugins/primus_lisp/semantics/bits.lisp

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,61 @@
2121
in two's complement overflow."
2222
(logor (logand (msb rn) (msb rm) (lnot (msb rd)))
2323
(logand (lnot (msb rn)) (lnot (msb rm)) (msb rd))))
24+
25+
(defmacro popcount/helper (x sh m1 m2 m4 h01)
26+
(prog
27+
(set x (- x (logand (rshift x 1) m1)))
28+
(set x (+ (logand x m2) (logand (rshift x 2) m2)))
29+
(set x (logand (+ x (rshift x 4)) m4))
30+
(rshift (* x h01) sh)))
31+
32+
(defmacro popcount16 (x)
33+
(popcount/helper x 8
34+
0x5555
35+
0x3333
36+
0x0f0f
37+
0x0101))
38+
39+
(defmacro popcount32 (x)
40+
(popcount/helper x 24
41+
0x55555555
42+
0x33333333
43+
0x0f0f0f0f
44+
0x01010101))
45+
46+
(defmacro popcount64 (x)
47+
(popcount/helper x 56
48+
0x5555555555555555
49+
0x3333333333333333
50+
0x0f0f0f0f0f0f0f0f
51+
0x0101010101010101))
52+
53+
(defun clz16 (r)
54+
(let ((x r))
55+
(set x (logor x (rshift x 1)))
56+
(set x (logor x (rshift x 2)))
57+
(set x (logor x (rshift x 4)))
58+
(set x (logor x (rshift x 8)))
59+
(set x (lnot x))
60+
(popcount16 x)))
61+
62+
(defun clz32 (x)
63+
(let ((x x))
64+
(set x (logor x (rshift x 1)))
65+
(set x (logor x (rshift x 2)))
66+
(set x (logor x (rshift x 4)))
67+
(set x (logor x (rshift x 8)))
68+
(set x (logor x (rshift x 16)))
69+
(set x (lnot x))
70+
(popcount32 x)))
71+
72+
(defun clz64 (x)
73+
(let ((x x))
74+
(set x (logor x (rshift x 1)))
75+
(set x (logor x (rshift x 2)))
76+
(set x (logor x (rshift x 4)))
77+
(set x (logor x (rshift x 8)))
78+
(set x (logor x (rshift x 16)))
79+
(set x (logor x (rshift x 32)))
80+
(set x (lnot x))
81+
(popcount64 x)))

0 commit comments

Comments
 (0)