more tuning

Daniel Lemire · Daniel Lemire · commit 2dd412ac0084 · 2025-04-18T17:52:01.000-04:00
diff --git a/benchmarks/benchmark.cpp b/benchmarks/benchmark.cpp
@@ -23,6 +23,7 @@
 #include <string>
 #include <variant>
 #include <fast_float/fast_float.h>
+#include <fmt/core.h>
 
 using Benchmarks::arithmetic_float;
 using Benchmarks::BenchArgs;
@@ -47,12 +48,12 @@ void evaluateProperties(const std::vector<T> &lines,
 
   for (const auto &algo : args) {
     if (!algo.used) {
-      std::cout << "# skipping " << algo.name << std::endl;
+      fmt::println("# skipping {}", algo.name);
       continue;
     }
     // Apply filter if provided
     if (!is_matched(algo.name, filter)) {
-      std::cout << "# filtered out " << algo.name << std::endl;
+      fmt::println("# filtered out {}", algo.name);
       continue;
     }
     char buf1[100], buf2[100];
@@ -93,7 +94,7 @@ struct diy_float_t {
 template <arithmetic_float T>
 void process(const std::vector<T> &lines,
              const std::array<BenchArgs<T>, Benchmarks::COUNT> &args, const std::span<std::string> filter = {}) {
-  // We have a special algorithm for the reference:
+  // We have a special algorithm for the string generation:
   std::string just_string = "just_string";
   if (is_matched(just_string, filter)) {
     std::vector<diy_float_t> parsed;
@@ -110,17 +111,16 @@ void process(const std::vector<T> &lines,
       return volume;
     }, 100);
   } else {
-    std::cout << "# skipping " << just_string << std::endl;
-  
+    fmt::println("# skipping {}", just_string);
   }
   for (const auto &algo : args) {
     if (!algo.used) {
-      std::cout << "# skipping " << algo.name << std::endl;
+      fmt::println("# skipping {}", algo.name);
       continue;
     }
     // Apply filter if provided
     if (!is_matched(algo.name, filter)) {
-      std::cout << "# filtered out " << algo.name << std::endl;
+      fmt::println("# filtered out {}", algo.name);
       continue;
     }
     pretty_print(lines, algo.name, [&algo](const std::vector<T> &lines) -> int {
@@ -139,7 +139,7 @@ template <typename T>
 std::vector<T> fileload(const std::string &filename) {
   std::ifstream inputfile(filename);
   if (!inputfile) {
-    std::cerr << "can't open " << filename << std::endl;
+    fmt::print(stderr, "can't open {}\n", filename);
     return {};
   }
 
@@ -150,24 +150,21 @@ std::vector<T> fileload(const std::string &filename) {
       lines.push_back(std::is_same_v<T, float> ? std::stof(line)
                                                : std::stod(line));
     } catch (...) {
-      std::cerr << "problem with " << line << "\n"
-                << "We expect floating-point numbers (one per line)."
-                << std::endl;
+      fmt::print(stderr, "problem with {}\nWe expect floating-point numbers (one per line).\n", line);
       std::abort();
     }
   }
-  std::cout << "# read " << lines.size() << " lines " << std::endl;
+  fmt::println("# read {} lines", lines.size());
   return lines;
 }
 
 template <typename T>
 std::vector<T> get_random_numbers(size_t howmany,
                                   const std::string &random_model) {
-  std::cout << "# parsing random numbers" << std::endl;
+  fmt::println("# parsing random numbers");
   std::vector<T> lines;
   auto g = get_generator_by_name<T>(random_model);
-  std::cout << "model: " << g->describe() << "\n"
-            << "volume: " << howmany << " floats" << std::endl;
+  fmt::print("model: {}\nvolume: {} floats\n", g->describe(), howmany);
   lines.reserve(howmany); // let us reserve plenty of memory.
   for (size_t i = 0; i < howmany; i++) {
     const T line = g->new_float();
@@ -203,14 +200,13 @@ int main(int argc, char **argv) {
     const auto result = options.parse(argc, argv);
 
     if (result["help"].as<bool>()) {
-      std::cout << options.help() << std::endl;
+      fmt::print("{}\n", options.help());
       return EXIT_SUCCESS;
     }
     const size_t repeat = result["repeat"].as<size_t>();
     const bool single = result["single"].as<bool>();
     std::vector<std::string> filter = result["algo-filter"].as<std::vector<std::string>>();
-    std::cout << "number type: binary"
-              << (single ? "32 (float)" : "64 (double)") << std::endl;
+    fmt::println("number type: binary{}", (single ? "32 (float)" : "64 (double)"));
 
     std::variant<std::vector<float>, std::vector<double>> numbers;
     const auto filename = result["file"].as<std::string>();
@@ -221,9 +217,7 @@ int main(int argc, char **argv) {
         numbers = get_random_numbers<float>(volume, model);
       else
         numbers = get_random_numbers<double>(volume, model);
-      std::cout << "# You can also provide a filename (with the -f flag): "
-                   "it should contain one string per line corresponding to a number"
-                << std::endl;
+      fmt::println("# You can also provide a filename (with the -f flag): it should contain one string per line corresponding to a number");
     }
     else {
       if (single)
@@ -241,7 +235,7 @@ int main(int argc, char **argv) {
       algorithms = Benchmarks::initArgs<double>(errol);
 
     if(repeat > 0) {
-      std::cout << "# forcing repeat count to " << repeat << std::endl;
+      fmt::println("# forcing repeat count to {}", repeat);
       std::visit([repeat](auto &args) {
         for (auto &arg : args)
           arg.testRepeat = repeat;
@@ -260,7 +254,20 @@ int main(int argc, char **argv) {
       }
     }, numbers, algorithms);
   } catch (const std::exception &e) {
-    std::cout << "error parsing options: " << e.what() << std::endl;
+    fmt::println("Error parsing options: {}", e.what());
+    fmt::println("\nUSAGE GUIDE:");
+    fmt::println("  ./benchmark [OPTIONS]");
+    fmt::println("\nCOMMAND SUMMARY:");
+    fmt::println("  The benchmark tool evaluates the performance of different floating-point to string");
+    fmt::println("  conversion algorithms. It can use either synthetic data or a file containing");
+    fmt::println("  floating-point numbers (one per line).");
+    fmt::println("\nEXAMPLES:");
+    fmt::println("  ./benchmark --single                    # Run benchmark with single precision (float)");
+    fmt::println("  ./benchmark --file=data/canada.txt      # Run benchmark using numbers from a file");
+    fmt::println("  ./benchmark --test                      # Test correctness instead of performance");
+    fmt::println("  ./benchmark --volume=1000 --model=uniform # Generate 1000 uniform random numbers");
+    fmt::println("  ./benchmark --algo-filter=ryu,grisu     # Only test algorithms containing 'ryu' or 'grisu'");
+    fmt::println("\nFor full options list, run: ./benchmark --help");
     return EXIT_FAILURE;
   }
 }
diff --git a/benchmarks/exhaustivefloat32.cpp b/benchmarks/exhaustivefloat32.cpp
@@ -48,14 +48,14 @@ std::optional<float> parse_float(std::string_view sv) {
   float result;
   const char* begin = sv.data();
   const char* end = sv.data() + sv.size();
-  
+
   auto [ptr, ec] = std::from_chars(begin, end, result);
-  
+
   // Check if parsing succeeded and consumed the entire string
   if (ec == std::errc{} && ptr == end) {
       return result;
   }
-  
+
   // Return nullopt if parsing failed or didn't consume all input
   return std::nullopt;
 }
@@ -76,7 +76,7 @@ void run_exhaustive32(bool errol, const std::vector<std::string>& algo_filter =
       fmt::print("# skipping {} because it is the reference.\n", algo.name);
       continue;
     }
-    
+
     // Apply filter if provided
     if (!algo_filter.empty()) {
       bool matched = false;
@@ -91,7 +91,7 @@ void run_exhaustive32(bool errol, const std::vector<std::string>& algo_filter =
         continue;
       }
     }
-    
+
     bool incorrect = false;
     char buf1[100], buf2[100];
     std::span<char> bufRef(buf1, sizeof(buf1)), bufAlgo(buf2, sizeof(buf2));
diff --git a/benchmarks/ieeeToString.cpp b/benchmarks/ieeeToString.cpp
@@ -57,7 +57,7 @@ IEEE754d decode_ieee754(double f) {
 }
 
 ////////////////////////
-// We should use https://en.cppreference.com/w/cpp/numeric/countl_zero 
+// We should use https://en.cppreference.com/w/cpp/numeric/countl_zero
 ////////////////////////
 #if WE_HAVE_VISUAL_STUDIO
 inline int leading_zeroes_64(uint64_t input_num) {
@@ -84,7 +84,7 @@ inline int leading_zeroes_64(uint64_t input_num) {
 inline int int_log2_64(uint64_t x) { return 63 - leading_zeroes_64(x | 1); }
 
 /**
- * Reference:  
+ * Reference:
  * Daniel Lemire, "Computing the number of digits of an integer even faster," in Daniel Lemire's blog, June 3, 2021, https://lemire.me/blog/2021/06/03/computing-the-number-of-digits-of-an-integer-even-faster/.
  */
 inline int fast_digit_count32(uint32_t x) {
@@ -101,7 +101,7 @@ inline int fast_digit_count32(uint32_t x) {
 
 
 /**
- * Reference:  
+ * Reference:
  * Daniel Lemire, "Counting the digits of 64-bit integers," in Daniel Lemire's blog, January 7, 2025, https://lemire.me/blog/2025/01/07/counting-the-digits-of-64-bit-integers/.
  */
 inline int fast_digit_count64(uint64_t x) {
@@ -133,12 +133,13 @@ inline int fast_digit_count64(uint64_t x) {
 template <typename T>
 int to_chars(T mantissa, int32_t exponent, bool sign, char* const result) {
   constexpr bool is_double = sizeof(T) == 8;
+  static_assert(is_double || sizeof(T) == 4, "Unsupported type size");
 
   int index = 0;
   if (sign)
     result[index++] = '-';
   // We use fast arithmetic to compute the number of digits.
-  const uint32_t olength = is_double ? fast_digit_count64(mantissa) 
+  const uint32_t olength = is_double ? fast_digit_count64(mantissa)
                                      : fast_digit_count32(mantissa);
   // Print the decimal digits.
   // for (uint32_t i = 0; i < olength - 1; ++i) {
@@ -210,30 +211,32 @@ int to_chars(T mantissa, int32_t exponent, bool sign, char* const result) {
   }
 
   // Print the exponent.
-  result[index++] = 'E';
   int32_t exp = exponent + (int32_t) olength - 1;
-  if (exp < 0) {
-    result[index++] = '-';
-    exp = -exp;
-  }
-
-  const auto handle_common_cases = [&]() {
-    if (exp >= 10) {
-      memcpy(result + index, hundreds_digit_table + 2 * exp, 2);
-      index += 2;
-    } else
-      result[index++] = (char)('0' + exp);
-  };
-  if constexpr (is_double) {
-    if (exp >= 100) {
-      const int32_t c = exp % 10;
-      memcpy(result + index, hundreds_digit_table + 2 * (exp / 10), 2);
-      result[index + 2] = (char) ('0' + c);
-      index += 3;
+  if(mantissa && exp) { // We do not print the exponent if mantissa is zero.
+    result[index++] = 'E';
+    if (exp < 0) {
+      result[index++] = '-';
+      exp = -exp;
+    }
+
+    const auto handle_common_cases = [&]() {
+      if (exp >= 10) {
+        memcpy(result + index, hundreds_digit_table + 2 * exp, 2);
+        index += 2;
+      } else
+        result[index++] = (char)('0' + exp);
+    };
+    if constexpr (is_double) {
+      if (exp >= 100) {
+        const int32_t c = exp % 10;
+        memcpy(result + index, hundreds_digit_table + 2 * (exp / 10), 2);
+        result[index + 2] = (char) ('0' + c);
+        index += 3;
+      } else
+        handle_common_cases();
     } else
       handle_common_cases();
-  } else
-    handle_common_cases();
+  }
 
   return index;
 }
diff --git a/benchmarks/thoroughfloat64.cpp b/benchmarks/thoroughfloat64.cpp
@@ -50,14 +50,14 @@ std::optional<double> parse_double(std::string_view sv) {
   double result;
   const char* begin = sv.data();
   const char* end = sv.data() + sv.size();
-  
+
   auto [ptr, ec] = std::from_chars(begin, end, result);
-  
+
   // Check if parsing succeeded and consumed the entire string
   if (ec == std::errc{} && ptr == end) {
       return result;
   }
-  
+
   // Return nullopt if parsing failed or didn't consume all input
   return std::nullopt;
 }
@@ -72,7 +72,7 @@ std::vector<test_case> load_doubles_from_file(const std::string& filename) {
   std::vector<test_case> numbers;
   std::ifstream file(filename);
   std::string line;
-  
+
   if (!file.is_open()) {
     fmt::print("Error: Could not open file {}\n", filename);
     return numbers;
@@ -85,7 +85,7 @@ std::vector<test_case> load_doubles_from_file(const std::string& filename) {
       fmt::print("Warning: Could not parse '{}' as double, skipping\n", line);
     }
   }
-  
+
   file.close();
   return numbers;
 }
@@ -113,7 +113,7 @@ void run_file_test(const std::string& filename, bool errol, const std::vector<st
       fmt::print("# skipping {} because it is the reference.\n", algo.name);
       continue;
     }
-    
+
     // Apply filter if provided
     if (!algo_filter.empty()) {
       bool matched = false;
@@ -128,13 +128,13 @@ void run_file_test(const std::string& filename, bool errol, const std::vector<st
         continue;
       }
     }
-    
+
     bool incorrect = false;
     char buf1[100], buf2[100];
     std::span<char> bufRef(buf1, sizeof(buf1)), bufAlgo(buf2, sizeof(buf2));
     fmt::print("# processing {}", algo.name);
     fflush(stdout);
-    
+
     size_t total = test_values.size();
     for (size_t i = 0; i < total; ++i) {
       if (i % (total/10) == 0 && total > 10) {
@@ -145,7 +145,7 @@ void run_file_test(const std::string& filename, bool errol, const std::vector<st
       const std::string& str_value = test_values[i].str_value;
       if (std::isnan(d) || std::isinf(d))
         continue;
-      
+
       const size_t vRef = Benchmarks::dragonbox(d, bufRef);
       const size_t vAlgo = algo.func(d, bufAlgo);
 
@@ -157,7 +157,7 @@ void run_file_test(const std::string& filename, bool errol, const std::vector<st
       auto countAlgo = count_significant_digits(svAlgo);
       auto backRef = parse_double(svRef);
       auto backAlgo = parse_double(svAlgo);
-      
+
       if(!backRef || !backAlgo) {
         incorrect = true;
         fmt::print(" parse error: case: {}; d = {}, bufRef = {}, bufAlgo = {}", str_value, double_to_hex(d),