@@ -141,17 +141,27 @@ constexpr type_t half_hi(type_t value) noexcept;
141141template <typename type_t , std::enable_if_t <is_unsigned_v<type_t >, int > = 0 >
142142constexpr type_t half_make_hi (type_t value) noexcept ;
143143
144+ // propagate most significant bit to the right
145+
146+ template <typename type_t , uint_t byte_count = byte_count_v<type_t >, std::enable_if_t <is_unsigned_v<type_t >, int > = 0 >
147+ constexpr type_t pmsbr (type_t value) noexcept ;
148+
149+ // calculate number of set bits
150+
151+ template <typename type_t , std::enable_if_t <is_unsigned_v<type_t >, int > = 0 >
152+ constexpr uint_t popcnt (type_t value) noexcept ;
153+
144154// calculate leading zero bits
145155
146156template <typename type_t , std::enable_if_t <is_unsigned_v<type_t >, int > = 0 >
147157constexpr uint_t nlz (type_t value) noexcept ;
148158
149- // shift bits to left
159+ // shift bits to the left
150160
151161template <typename type_t , std::enable_if_t <is_unsigned_v<type_t >, int > = 0 >
152162constexpr type_t shl2 (type_t value_hi, type_t value_lo, uint_t shift) noexcept ;
153163
154- // shift bits to right
164+ // shift bits to the right
155165
156166template <typename type_t , std::enable_if_t <is_unsigned_v<type_t >, int > = 0 >
157167constexpr type_t shr2 (type_t value_hi, type_t value_lo, uint_t shift) noexcept ;
@@ -230,19 +240,87 @@ constexpr type_t half_make_hi(type_t value) noexcept
230240
231241
232242// //////////////////////////////////////////////////////////////////////////////////////////////////
233- template <typename type_t , std::enable_if_t <is_unsigned_v<type_t >, int >>
234- constexpr uint_t nlz (type_t value) noexcept
243+ template <typename type_t , uint_t byte_count, std::enable_if_t <is_unsigned_v<type_t >, int >>
244+ constexpr type_t pmsbr (type_t value) noexcept
235245{
236- uint_t result = 0 ;
237- type_t mask = type_t (1 ) << (bit_count_v<type_t > - 1 );
246+ if constexpr (byte_count == 1 ) {
238247
239- while ((~value & mask) != 0 ) {
248+ value |= (value >> 1 );
249+ value |= (value >> 2 );
250+ value |= (value >> 4 );
240251
241- mask >>= 1 ;
242- result++;
252+ } else {
253+
254+ value = pmsbr<type_t , byte_count / 2 >(value);
255+ value |= (value >> (4 * byte_count));
243256 }
244257
245- return result;
258+ return value;
259+ }
260+
261+
262+
263+ // //////////////////////////////////////////////////////////////////////////////////////////////////
264+ template <typename type_t , uint_t bit_count = bit_count_v<type_t >, uint_t mask_count = bit_count_v<type_t > / bit_count, std::enable_if_t <is_unsigned_v<type_t >, int > = 0 >
265+ constexpr type_t popcnt_msk () noexcept
266+ {
267+ if constexpr (mask_count == 1 ) {
268+
269+ return pmsbr<type_t >(type_t (1 ) << (bit_count / 2 - 1 ));
270+
271+ } else {
272+
273+ constexpr type_t mask = popcnt_msk<type_t , bit_count, mask_count - 1 >();
274+ return mask | (mask << (bit_count));
275+ }
276+ }
277+
278+
279+
280+ // //////////////////////////////////////////////////////////////////////////////////////////////////
281+ template <typename type_t , uint_t byte_count = byte_count_v<type_t >, std::enable_if_t <is_unsigned_v<type_t >, int > = 0 >
282+ constexpr type_t popcnt_impl (type_t value) noexcept
283+ {
284+ if constexpr (byte_count <= 4 ) {
285+
286+ constexpr type_t mask2 = popcnt_msk<type_t , 2 >();
287+ constexpr type_t mask4 = popcnt_msk<type_t , 4 >();
288+ constexpr type_t mask8 = popcnt_msk<type_t , 8 >();
289+
290+ value = value - ((value >> 1 ) & mask2);
291+ value = (value & mask4) + ((value >> 2 ) & mask4);
292+ value = (value + (value >> 4 )) & mask8;
293+
294+ if constexpr (byte_count >= 2 )
295+ value = value + (value >> 8 );
296+ if constexpr (byte_count >= 3 )
297+ value = value + (value >> 16 );
298+
299+ } else {
300+
301+ value = popcnt_impl<type_t , byte_count / 2 >(value);
302+ value = value + (value >> 4 * byte_count);
303+ }
304+
305+ return value;
306+ }
307+
308+
309+
310+ // //////////////////////////////////////////////////////////////////////////////////////////////////
311+ template <typename type_t , std::enable_if_t <is_unsigned_v<type_t >, int >>
312+ constexpr uint_t popcnt (type_t value) noexcept
313+ {
314+ return popcnt_impl<type_t >(value) & ((bit_count_v<type_t > << 2 ) - 1 );
315+ }
316+
317+
318+
319+ // //////////////////////////////////////////////////////////////////////////////////////////////////
320+ template <typename type_t , std::enable_if_t <is_unsigned_v<type_t >, int >>
321+ constexpr uint_t nlz (type_t value) noexcept
322+ {
323+ return bit_count_v<type_t > - popcnt (pmsbr (value));
246324}
247325
248326
@@ -470,7 +548,7 @@ constexpr type_t divr2(type_t value1_hi, type_t value1_lo, type_t value2, std::o
470548
471549 const type_t t1 = quotient_hi * nvalue2_lo;
472550 const type_t t2 = half_make_hi (*remainder_hi) | nvalue1_hi;
473-
551+
474552 if (t1 > t2) {
475553
476554 --quotient_hi;
@@ -483,7 +561,7 @@ constexpr type_t divr2(type_t value1_hi, type_t value1_lo, type_t value2, std::o
483561
484562 std::optional<type_t > remainder_lo = type_t ();
485563 type_t quotient_lo = divr (nvalue1_21, nvalue2_hi, remainder_lo);
486-
564+
487565 const type_t t3 = quotient_lo * nvalue2_lo;
488566 const type_t t4 = half_make_hi (*remainder_lo) | nvalue1_lo;
489567
0 commit comments