Skip to content

Commit 7015039

Browse files
authored
Use idiomatic CLZ implementation (#582)
Replaces the previous implementation of clz32 in the reference softfloat library, which had multiple paths and compiler-specific intrinsics or inline assembly for some architectures, with a simpler idiomatic pure C implementation. Modern compilers recognize this implementation and will optimize into a native CLZ instruction on most architectures. Tested on Clang 18 and GCC 16 for AArch64 and x86-64.
1 parent 27c6ab1 commit 7015039

File tree

1 file changed

+10
-60
lines changed

1 file changed

+10
-60
lines changed

Source/astcenc_mathlib_softfloat.cpp

Lines changed: 10 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
// SPDX-License-Identifier: Apache-2.0
22
// ----------------------------------------------------------------------------
3-
// Copyright 2011-2021 Arm Limited
3+
// Copyright 2011-2021,2025 Arm Limited
44
//
55
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
66
// use this file except in compliance with the License. You may obtain a copy
@@ -30,69 +30,19 @@ typedef uint16_t sf16;
3030
typedef uint32_t sf32;
3131

3232
/******************************************
33-
helper functions and their lookup tables
33+
helper functions
3434
******************************************/
35-
/* count leading zeros functions. Only used when the input is nonzero. */
36-
37-
#if defined(__GNUC__) && (defined(__i386) || defined(__amd64))
38-
#elif defined(__arm__) && defined(__ARMCC_VERSION)
39-
#elif defined(__arm__) && defined(__GNUC__)
40-
#else
41-
/* table used for the slow default versions. */
42-
static const uint8_t clz_table[256] =
43-
{
44-
8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
45-
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
46-
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
47-
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
48-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
49-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
50-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
51-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
52-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
53-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
54-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
55-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
56-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
57-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
58-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
59-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
60-
};
61-
#endif
6235

63-
/*
64-
32-bit count-leading-zeros function: use the Assembly instruction whenever possible. */
36+
/* Idiomatic count-leading zeros, generates native instruction on modern compilers. */
6537
static uint32_t clz32(uint32_t inp)
6638
{
67-
#if defined(__GNUC__) && (defined(__i386) || defined(__amd64))
68-
uint32_t bsr;
69-
__asm__("bsrl %1, %0": "=r"(bsr):"r"(inp | 1));
70-
return 31 - bsr;
71-
#else
72-
#if defined(__arm__) && defined(__ARMCC_VERSION)
73-
return __clz(inp); /* armcc builtin */
74-
#else
75-
#if defined(__arm__) && defined(__GNUC__)
76-
uint32_t lz;
77-
__asm__("clz %0, %1": "=r"(lz):"r"(inp));
78-
return lz;
79-
#else
80-
/* slow default version */
81-
uint32_t summa = 24;
82-
if (inp >= UINT32_C(0x10000))
83-
{
84-
inp >>= 16;
85-
summa -= 16;
86-
}
87-
if (inp >= UINT32_C(0x100))
88-
{
89-
inp >>= 8;
90-
summa -= 8;
91-
}
92-
return summa + clz_table[inp];
93-
#endif
94-
#endif
95-
#endif
39+
uint32_t count = 32;
40+
while (inp)
41+
{
42+
inp >>= 1;
43+
count--;
44+
}
45+
return count;
9646
}
9747

9848
/* the five rounding modes that IEEE-754r defines */

0 commit comments

Comments
 (0)