|
1 | 1 | // SPDX-License-Identifier: Apache-2.0 |
2 | 2 | // ---------------------------------------------------------------------------- |
3 | | -// Copyright 2011-2021 Arm Limited |
| 3 | +// Copyright 2011-2021,2025 Arm Limited |
4 | 4 | // |
5 | 5 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not |
6 | 6 | // use this file except in compliance with the License. You may obtain a copy |
@@ -30,69 +30,19 @@ typedef uint16_t sf16; |
30 | 30 | typedef uint32_t sf32; |
31 | 31 |
|
32 | 32 | /****************************************** |
33 | | - helper functions and their lookup tables |
| 33 | + helper functions |
34 | 34 | ******************************************/ |
35 | | -/* count leading zeros functions. Only used when the input is nonzero. */ |
36 | | - |
37 | | -#if defined(__GNUC__) && (defined(__i386) || defined(__amd64)) |
38 | | -#elif defined(__arm__) && defined(__ARMCC_VERSION) |
39 | | -#elif defined(__arm__) && defined(__GNUC__) |
40 | | -#else |
41 | | - /* table used for the slow default versions. */ |
42 | | - static const uint8_t clz_table[256] = |
43 | | - { |
44 | | - 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, |
45 | | - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, |
46 | | - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
47 | | - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
48 | | - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
49 | | - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
50 | | - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
51 | | - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
52 | | - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
53 | | - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
54 | | - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
55 | | - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
56 | | - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
57 | | - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
58 | | - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
59 | | - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 |
60 | | - }; |
61 | | -#endif |
62 | 35 |
|
63 | | -/* |
64 | | - 32-bit count-leading-zeros function: use the Assembly instruction whenever possible. */ |
| 36 | +/* Idiomatic count-leading zeros, generates native instruction on modern compilers. */ |
65 | 37 | static uint32_t clz32(uint32_t inp) |
66 | 38 | { |
67 | | - #if defined(__GNUC__) && (defined(__i386) || defined(__amd64)) |
68 | | - uint32_t bsr; |
69 | | - __asm__("bsrl %1, %0": "=r"(bsr):"r"(inp | 1)); |
70 | | - return 31 - bsr; |
71 | | - #else |
72 | | - #if defined(__arm__) && defined(__ARMCC_VERSION) |
73 | | - return __clz(inp); /* armcc builtin */ |
74 | | - #else |
75 | | - #if defined(__arm__) && defined(__GNUC__) |
76 | | - uint32_t lz; |
77 | | - __asm__("clz %0, %1": "=r"(lz):"r"(inp)); |
78 | | - return lz; |
79 | | - #else |
80 | | - /* slow default version */ |
81 | | - uint32_t summa = 24; |
82 | | - if (inp >= UINT32_C(0x10000)) |
83 | | - { |
84 | | - inp >>= 16; |
85 | | - summa -= 16; |
86 | | - } |
87 | | - if (inp >= UINT32_C(0x100)) |
88 | | - { |
89 | | - inp >>= 8; |
90 | | - summa -= 8; |
91 | | - } |
92 | | - return summa + clz_table[inp]; |
93 | | - #endif |
94 | | - #endif |
95 | | - #endif |
| 39 | + uint32_t count = 32; |
| 40 | + while (inp) |
| 41 | + { |
| 42 | + inp >>= 1; |
| 43 | + count--; |
| 44 | + } |
| 45 | + return count; |
96 | 46 | } |
97 | 47 |
|
98 | 48 | /* the five rounding modes that IEEE-754r defines */ |
|
0 commit comments