Merge branch 'develop' into restore_promote

ckormanyos · ckormanyos · commit 5ac823ad7eb3 · 2024-05-07T07:45:52.000+02:00
diff --git a/include/boost/decimal/detail/remove_trailing_zeros.hpp b/include/boost/decimal/detail/remove_trailing_zeros.hpp
@@ -40,8 +40,13 @@ constexpr auto remove_trailing_zeros(std::uint32_t n) noexcept -> remove_trailin
 {
     std::size_t s {};
 
-    auto r = rotr<32>(n * UINT32_C(184254097), 4);
-    auto b = r < UINT32_C(429497);
+    auto r = rotr<32>(n * UINT32_C(15273505), 8);
+    auto b = r < UINT32_C(43);
+    s = s * 2U + static_cast<std::size_t>(b);
+    n = b ? r : n;
+
+    r = rotr<32>(n * UINT32_C(184254097), 4);
+    b = r < UINT32_C(429497);
     s = s * 2U + static_cast<std::size_t>(b);
     n = b ? r : n;
 
@@ -62,8 +67,13 @@ constexpr auto remove_trailing_zeros(std::uint64_t n) noexcept -> remove_trailin
 {
     std::size_t s {};
 
-    auto r = rotr<64>(n * UINT64_C(28999941890838049), 8);
-    auto b = r < UINT64_C(184467440738);
+    auto r = rotr<64>(n * UINT64_C(230079197716545), 16);
+    auto b = r < UINT64_C(1845);
+    s = s * 2U + static_cast<std::size_t>(b);
+    n = b ? r : n;
+
+    r = rotr<64>(n * UINT64_C(28999941890838049), 8);
+    b = r < UINT64_C(184467440738);
     s = s * 2U + static_cast<std::size_t>(b);
     n = b ? r : n;
 
@@ -85,22 +95,39 @@ constexpr auto remove_trailing_zeros(std::uint64_t n) noexcept -> remove_trailin
     return {n, s};
 }
 
-// TODO(mborland): Make this better for the 2-word case
 constexpr auto remove_trailing_zeros(uint128 n) noexcept -> remove_trailing_zeros_return<uint128>
 {
-    if (n.high == UINT64_C(0))
-    {
-        const auto temp {remove_trailing_zeros(n.low)};
-        return {static_cast<uint128>(temp.trimmed_number), temp.number_of_removed_zeros};
-    }
-
     std::size_t s {};
 
-    while (n % 10 == 0)
-    {
-        n /= 10;
-        ++s;
-    }
+    auto r = rotr<128>(n * uint128(UINT64_C(0x62B42691AD836EB1), UINT64_C(0x16590F420A835081)), 32);
+    auto b = r < uint128 {UINT64_C(0x0), UINT64_C(0x33EC48)};
+    s = s * 2U + static_cast<std::size_t>(b);
+    n = b ? r : n;
+
+    r = rotr<128>(n * uint128 {UINT64_C(0x3275305C1066), UINT64_C(0xE4A4D1417CD9A041)}, 16);
+    b = r < uint128 {UINT64_C(0x734), UINT64_C(0xACA5F6226F0ADA62)};
+    s = s * 2U + static_cast<std::size_t>(b);
+    n = b ? r : n;
+
+    r = rotr<128>(n * uint128 {UINT64_C(0x6B7213EE9F5A78), UINT64_C(0xC767074B22E90E21)}, 8);
+    b = r < uint128 {UINT64_C(0x2AF31DC461), UINT64_C(0x1873BF3F70834ACE)};
+    s = s * 2U + static_cast<std::size_t>(b);
+    n = b ? r : n;
+
+    r = rotr<128>(n * uint128 {UINT64_C(0x95182A9930BE0DE), UINT64_C(0xD288CE703AFB7E91)}, 4);
+    b = r < uint128 {UINT64_C(0x68DB8BAC710CB), UINT64_C(0x295E9E1B089A0276)};
+    s = s * 2U + static_cast<std::size_t>(b);
+    n = b ? r : n;
+
+    r = rotr<128>(n * uint128 {UINT64_C(0x28F5C28F5C28F5C2), UINT64_C(0x8F5C28F5C28F5C29)}, 2);
+    b = r < uint128 {UINT64_C(0x28F5C28F5C28F5C), UINT64_C(0x28F5C28F5C28F5C3)};
+    s = s * 2U + static_cast<std::size_t>(b);
+    n = b ? r : n;
+
+    r = rotr<128>(n * uint128 {UINT64_C(0xCCCCCCCCCCCCCCCC), UINT64_C(0xCCCCCCCCCCCCCCCD)}, 1);
+    b = r < uint128 {UINT64_C(0x1999999999999999), UINT64_C(0x999999999999999A)};
+    s = s * 2U + static_cast<std::size_t>(b);
+    n = b ? r : n;
 
     return {n, s};
 }
diff --git a/test/Jamfile b/test/Jamfile
@@ -110,6 +110,7 @@ run test_log1p.cpp ;
 run test_pow.cpp ;
 run test_promotion.cpp ;
 run test_remainder_remquo.cpp ;
+run test_remove_trailing_zeros.cpp ;
 run test_sin_cos.cpp ;
 run test_sinh.cpp ;
 run test_snprintf.cpp ;
diff --git a/test/test_remove_trailing_zeros.cpp b/test/test_remove_trailing_zeros.cpp
@@ -0,0 +1,87 @@
+// Copyright 2024 Matt Borland
+// Distributed under the Boost Software License, Version 1.0.
+// https://www.boost.org/LICENSE_1_0.txt
+
+#include <boost/decimal.hpp>
+#include <boost/core/lightweight_test.hpp>
+#include <array>
+#include <limits>
+#include <cstdint>
+
+template <typename T>
+void test()
+{
+    constexpr std::array<std::uint64_t, 20> powers_of_10 =
+    {{
+             UINT64_C(1), UINT64_C(10), UINT64_C(100), UINT64_C(1000), UINT64_C(10000), UINT64_C(100000), UINT64_C(1000000),
+             UINT64_C(10000000), UINT64_C(100000000), UINT64_C(1000000000), UINT64_C(10000000000), UINT64_C(100000000000),
+             UINT64_C(1000000000000), UINT64_C(10000000000000), UINT64_C(100000000000000), UINT64_C(1000000000000000),
+             UINT64_C(10000000000000000), UINT64_C(100000000000000000), UINT64_C(1000000000000000000), UINT64_C(10000000000000000000)
+     }};
+
+    for (const auto& val : powers_of_10)
+    {
+        if (val < std::numeric_limits<T>::max())
+        {
+            const auto temp {boost::decimal::detail::remove_trailing_zeros(static_cast<T>(val))};
+            if (!BOOST_TEST_EQ(temp.trimmed_number, T(1)))
+            {
+                // LCOV_EXCL_START
+                std::cerr << "Input Number: " << val
+                          << "\nOutput Number: " << temp.trimmed_number
+                          << "\nZeros removed: " << temp.number_of_removed_zeros << std::endl;
+                // LCOV_EXCL_STOP
+            }
+        }
+    }
+}
+
+void test_extended()
+{
+    using namespace boost::decimal;
+    constexpr std::array<detail::uint128, 18> powers_of_10 =
+    {{
+        detail::uint128 {UINT64_C(0x5), UINT64_C(0x6BC75E2D63100000)},
+        detail::uint128 {UINT64_C(0x36), UINT64_C(0x35C9ADC5DEA00000)},
+        detail::uint128 {UINT64_C(0x21E), UINT64_C(0x19E0C9BAB2400000)},
+        detail::uint128 {UINT64_C(0x152D), UINT64_C(0x02C7E14AF6800000)},
+        detail::uint128 {UINT64_C(0x84595), UINT64_C(0x161401484A000000)},
+        detail::uint128 {UINT64_C(0x52B7D2), UINT64_C(0xDCC80CD2E4000000)},
+        detail::uint128 {UINT64_C(0x33B2E3C), UINT64_C(0x9FD0803CE8000000)},
+        detail::uint128 {UINT64_C(0x204FCE5E), UINT64_C(0x3E25026110000000)},
+        detail::uint128 {UINT64_C(0x1431E0FAE), UINT64_C(0x6D7217CAA0000000)},
+        detail::uint128 {UINT64_C(0xC9F2C9CD0), UINT64_C(0x4674EDEA40000000)},
+        detail::uint128 {UINT64_C(0x7E37BE2022), UINT64_C(0xC0914B2680000000)},
+        detail::uint128 {UINT64_C(0x4EE2D6D415B), UINT64_C(0x85ACEF8100000000)},
+        detail::uint128 {UINT64_C(0x314DC6448D93), UINT64_C(0x38C15B0A00000000)},
+        detail::uint128 {UINT64_C(0x1ED09BEAD87C0), UINT64_C(0x378D8E6400000000)},
+        detail::uint128 {UINT64_C(0x13426172C74D82), UINT64_C(0x2B878FE800000000)},
+        detail::uint128 {UINT64_C(0xC097CE7BC90715), UINT64_C(0xB34B9F1000000000)},
+        detail::uint128 {UINT64_C(0x785EE10D5DA46D9), UINT64_C(0x00F436A000000000)},
+        detail::uint128 {UINT64_C(0x4B3B4CA85A86C47A), UINT64_C(0x098A224000000000)}
+     }};
+
+    for (const auto& val : powers_of_10)
+    {
+        const auto temp {boost::decimal::detail::remove_trailing_zeros(val)};
+        if (!BOOST_TEST_EQ(temp.trimmed_number, detail::uint128(1)))
+        {
+            // LCOV_EXCL_START
+            std::cerr << "Input Number: " << val
+                      << "\nOutput Number: " << temp.trimmed_number
+                      << "\nZeros removed: " << temp.number_of_removed_zeros << std::endl;
+            // LCOV_EXCL_STOP
+        }
+    }
+}
+
+int main()
+{
+    test<std::uint32_t>();
+    test<std::uint64_t>();
+    test<boost::decimal::detail::uint128>();
+
+    test_extended();
+
+    return boost::report_errors();
+}
diff --git a/tools/granlund-montgomery.py b/tools/granlund-montgomery.py
@@ -0,0 +1,39 @@
+def extended_euclidean(a, b):
+    if a == 0:
+        return b, 0, 1
+    gcd, x1, y1 = extended_euclidean(b % a, a)
+    x = y1 - (b // a) * x1
+    y = x1
+    return gcd, x, y
+
+def mod_inverse(a, m):
+    gcd, x, _ = extended_euclidean(a, m)
+    if gcd != 1:
+        return 0  # Modular inverse doesn't exist
+    else:
+        return x % m
+
+# Constants
+bits = int(128)
+t = int(32)
+
+q = int(10)**t
+q0 = int(q / int(2)**t)
+print("Q0: ", q0)
+twobt_min_t = int(2**(bits - t))
+
+# Calculate the modular inverse
+m0 = int(mod_inverse(q0, twobt_min_t))
+print("M0: ", m0)
+
+p0 = int((q0 * m0 - 1) / twobt_min_t)
+print("P0: ", p0)
+
+p = int(q0 + p0)
+print("P: ", p)
+
+m = int((twobt_min_t * p + 1) / q0)
+print("M: ", m)
+
+threshold_value = int(2**bits / q + 1)
+print("Threshold Value: ", threshold_value)