Merge pull request #28 from fastfloat/exhaustive32_count_fix

lemire · web-flow · commit a213fa32c5cc · 2025-04-08T13:45:27.000-04:00
count_significant_digits stop counting trailing zeroes
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.29)
+cmake_minimum_required(VERSION 3.28)
 
 project(SimpleFastFloatBenchmark VERSION 0.1.0 LANGUAGES CXX C)
 set(CMAKE_CXX_STANDARD 20)
diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt
@@ -89,10 +89,10 @@ target_include_directories(benchmark_deps INTERFACE ${grisu-exact_SOURCE_DIR})
 
 target_link_libraries(benchmark PUBLIC benchmark_deps)
 
-if(TO_CHARS_OK)
+if(TO_CHARS_OK AND FROM_CHARS_OK)
     add_executable(exhaustivefloat32
         exhaustivefloat32.cpp
     )
     target_link_libraries(exhaustivefloat32 PUBLIC benchmark_deps)
-endif(TO_CHARS_OK)
+endif(TO_CHARS_OK AND FROM_CHARS_OK)
 
diff --git a/benchmarks/algorithms.h b/benchmarks/algorithms.h
@@ -19,6 +19,7 @@
 
 #include <fmt/format.h>
 
+#include <array>
 #include <span>
 
 #include "cpp/common/traits.hpp"  // Teju Jagua
@@ -310,7 +311,6 @@ std::array<BenchArgs<T>, Benchmarks::COUNT> initArgs(bool errol = false) {
   return args;
 };
 
-
 }  // namespace Benchmarks
 
 #endif
diff --git a/benchmarks/exhaustivefloat32.cpp b/benchmarks/exhaustivefloat32.cpp
@@ -1,74 +1,64 @@
+#include <fmt/format.h>
 
-#include "algorithms.h"
-#include "cxxopts.hpp"
 #include <array>
 #include <bit>
 #include <cctype>
 #include <cmath>
 #include <cstring>
-#include <fmt/format.h>
 #include <iostream>
 #include <string_view>
+#include <charconv>
+
+#include "algorithms.h"
+#include "cxxopts.hpp"
 
 size_t count_significant_digits(std::string_view num_str) {
   size_t count = 0;
-  bool has_decimal = false;
-  bool in_exponent = false;
+  size_t trailing_zeros = 0;
   bool leading_zero = true;
 
   for (char c : num_str) {
-    if (c == '.') {
-      has_decimal = true;
+    if (c == '.')
       continue;
-    }
-    if (c == 'e' || c == 'E') {
-      in_exponent = true;
-      continue;
-    }
+    if (c == 'e' || c == 'E')
+      break; // Stop counting at exponent
     if (std::isdigit(static_cast<unsigned char>(c))) {
-      if (!in_exponent) {
-        if (leading_zero && c == '0') {
-          // Skip leading zeros before decimal
-          continue;
-        }
-        leading_zero = false;
-        count++;
+      if (c == '0') {
+        if (!leading_zero)
+          trailing_zeros++;
+        continue;
       }
-    }
-  }
-
-  // Special case: "X.0" should count as 1 digit
-  if (has_decimal && count > 1) {
-    auto last_digit_pos = num_str.find_last_not_of("0eE+-");
-    if (last_digit_pos != std::string_view::npos &&
-        num_str[last_digit_pos] == '.' && count == 2) {
-      return 1;
+      leading_zero = false;
+      count += trailing_zeros + 1;
+      trailing_zeros = 0;
     }
   }
 
   return count;
 }
 
 std::string float_to_hex(float f) {
-    if (std::isnan(f) || std::isinf(f)) {
-        return fmt::format("{}", f); // Handle special cases
-    }
-
-    uint32_t bits = std::bit_cast<uint32_t>(f);
-    int exponent;
-    float mantissa = std::frexp(f, &exponent); // Get mantissa and exponent
-    uint32_t mantissa_bits = bits & 0x7FFFFF;  // 23-bit mantissa
-    int exp_bits = (bits >> 23) & 0xFF;        // 8-bit exponent
-    bool sign = bits >> 31;                    // Sign bit
-
-    // Adjust for IEEE 754 representation
-    if (exp_bits == 0 && mantissa_bits == 0) {
-        return "0x0p+0"; // Zero case
-    }
+  std::ostringstream oss;
+  oss << std::hexfloat << f;
+  return oss.str();
+}
 
-    // Convert to hex format
-    return fmt::format("0x1.{:06x}p{:+d}", mantissa_bits, exponent - 23);
+std::optional<float> parse_float(std::string_view sv) {
+  float result;
+  const char* begin = sv.data();
+  const char* end = sv.data() + sv.size();
+  
+  auto [ptr, ec] = std::from_chars(begin, end, result);
+  
+  // Check if parsing succeeded and consumed the entire string
+  if (ec == std::errc{} && ptr == end) {
+      return result;
+  }
+  
+  // Return nullopt if parsing failed or didn't consume all input
+  return std::nullopt;
 }
+
 void run_exhaustive32(bool errol) {
   constexpr auto precision = std::numeric_limits<float>::digits10;
   fmt::println("{:20} {:20}", "Algorithm", "Valid shortest serialization");
@@ -78,7 +68,11 @@ void run_exhaustive32(bool errol) {
 
   for (const auto &algo : args) {
     if (!algo.used) {
-      std::cout << "# skipping " << algo.name << std::endl;
+      fmt::print("# skipping {}\n", algo.name);
+      continue;
+    }
+    if (algo.func == Benchmarks::dragonbox<float>) {
+      fmt::print("# skipping {} because it is the reference.\n", algo.name);
       continue;
     }
     bool incorrect = false;
@@ -97,15 +91,42 @@ void run_exhaustive32(bool errol) {
       std::memcpy(&d, &i32, sizeof(float));
       if (std::isnan(d) || std::isinf(d))
         continue;
-      // Reference output
-      const size_t vRef = Benchmarks::std_to_chars(d, bufRef);
+      // Reference output, we cannot use std::to_chars here, because it produces
+      // the shortest representation, which is not necessarily the same as the
+      // as the representation using the fewest significant digits.
+      // So we use dragonbox, which serves as the reference implementation.
+      const size_t vRef = Benchmarks::dragonbox(d, bufRef);
       const size_t vAlgo = algo.func(d, bufAlgo);
 
       std::string_view svRef{bufRef.data(), vRef};
       std::string_view svAlgo{bufAlgo.data(), vAlgo};
 
       auto countRef = count_significant_digits(svRef);
       auto countAlgo = count_significant_digits(svAlgo);
+      auto backRef = parse_float(svRef);
+      auto backAlgo = parse_float(svAlgo);
+      if(!backRef || !backAlgo) {
+        incorrect = true;
+        fmt::print(" parse error: d = {}, bufRef = {}, bufAlgo = {}", float_to_hex(d),
+                   svRef, svAlgo);
+        fflush(stdout);
+        break;
+      }
+      if(*backRef != d || *backAlgo != d) {
+        fmt::println("\n# Error: parsing the output with std::from_chars does not bring back the input.");
+      }
+      if(*backRef != d) {
+        incorrect = true;
+        fmt::print(" ref mismatch: d = {}, backRef = {}", d, *backRef);
+        fflush(stdout);
+        break;
+      }
+      if(*backAlgo != d) {
+        incorrect = true;
+        fmt::print(" algo mismatch: d = {}, backAlgo = {}, parsing the output with std::from_chars does not recover the original", d, *backAlgo);
+        fflush(stdout);
+        break;
+      }
       if (countRef != countAlgo) {
         incorrect = true;
         fmt::print(" mismatch: d = {}, bufRef = {}, bufAlgo = {}", float_to_hex(d),
@@ -133,12 +154,12 @@ int main(int argc, char **argv) {
     const auto result = options.parse(argc, argv);
 
     if (result["help"].as<bool>()) {
-      std::cout << options.help() << std::endl;
+      fmt::print("{}\n", options.help());
       return EXIT_SUCCESS;
     }
     run_exhaustive32(result["errol"].as<bool>());
   } catch (const std::exception &e) {
-    std::cout << "error parsing options: " << e.what() << std::endl;
+    fmt::print("error parsing options: {}\n", e.what());
     return EXIT_FAILURE;
   }
 }

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-cmake_minimum_required(VERSION 3.29)`
	`1`	`+cmake_minimum_required(VERSION 3.28)`
`2`	`2`
`3`	`3`	`project(SimpleFastFloatBenchmark VERSION 0.1.0 LANGUAGES CXX C)`
`4`	`4`	`set(CMAKE_CXX_STANDARD 20)`