Merge pull request #349 from shikharish/uint8

lemire · web-flow · commit 1ad224e42c0b · 2025-12-24T18:37:11.000-05:00
uint8_t parsing
diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h
@@ -509,6 +509,92 @@ parse_int_string(UC const *p, UC const *pend, T &value,
 
   UC const *const start_digits = p;
 
+  FASTFLOAT_IF_CONSTEXPR17((std::is_same<T, std::uint8_t>::value)) {
+    const size_t len = (size_t)(pend - p);
+    if (len == 0) {
+      if (has_leading_zeros) {
+        value = 0;
+        answer.ec = std::errc();
+        answer.ptr = p;
+      } else {
+        answer.ec = std::errc::invalid_argument;
+        answer.ptr = first;
+      }
+      return answer;
+    }
+
+    union {
+      uint8_t as_str[4];
+      uint32_t as_int;
+    } digits;
+
+    if (cpp20_and_in_constexpr()) {
+      digits.as_int = 0;
+      for (size_t j = 0; j < 4 && j < len; ++j) {
+        digits.as_str[j] = static_cast<uint8_t>(p[j]);
+      }
+    } else if (len >= 4) {
+      memcpy(&digits.as_int, p, 4);
+    } else {
+      uint32_t b0 = static_cast<uint8_t>(p[0]);
+      uint32_t b1 = (len > 1) ? static_cast<uint8_t>(p[1]) : 0xFFu;
+      uint32_t b2 = (len > 2) ? static_cast<uint8_t>(p[2]) : 0xFFu;
+      uint32_t b3 = 0xFFu;
+#if FASTFLOAT_IS_BIG_ENDIAN
+      digits.as_int = (b0 << 24) | (b1 << 16) | (b2 << 8) | b3;
+#else
+      digits.as_int = b0 | (b1 << 8) | (b2 << 16) | (b3 << 24);
+#endif
+    }
+
+    uint32_t magic =
+        ((digits.as_int + 0x46464646u) | (digits.as_int - 0x30303030u)) &
+        0x80808080u;
+    uint32_t tz = (uint32_t)countr_zero_32(magic); // 7, 15, 23, 31, or 32
+    uint32_t nd = (tz == 32) ? 4 : (tz >> 3);
+    nd = (uint32_t)std::min((size_t)nd, len);
+    if (nd == 0) {
+      if (has_leading_zeros) {
+        value = 0;
+        answer.ec = std::errc();
+        answer.ptr = p;
+        return answer;
+      }
+      answer.ec = std::errc::invalid_argument;
+      answer.ptr = first;
+      return answer;
+    }
+    if (nd > 3) {
+      const UC *q = p + nd;
+      size_t rem = len - nd;
+      while (rem) {
+        if (*q < UC('0') || *q > UC('9'))
+          break;
+        ++q;
+        --rem;
+      }
+      answer.ec = std::errc::result_out_of_range;
+      answer.ptr = q;
+      return answer;
+    }
+
+    digits.as_int ^= 0x30303030u;
+    digits.as_int <<= ((4 - nd) * 8);
+
+    uint32_t check = ((digits.as_int >> 24) & 0xff) |
+                     ((digits.as_int >> 8) & 0xff00) |
+                     ((digits.as_int << 8) & 0xff0000);
+    if (check > 0x00020505) {
+      answer.ec = std::errc::result_out_of_range;
+      answer.ptr = p + nd;
+      return answer;
+    }
+    value = (uint8_t)((0x640a01 * digits.as_int) >> 24);
+    answer.ec = std::errc();
+    answer.ptr = p + nd;
+    return answer;
+  }
+
   uint64_t i = 0;
   if (base == 10) {
     loop_parse_if_eight_digits(p, pend, i); // use SIMD if possible
diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h
@@ -362,6 +362,52 @@ leading_zeroes(uint64_t input_num) {
 #endif
 }
 
+/* Helper C++14 constexpr generic implementation of countr_zero for 32-bit */
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14 int
+countr_zero_generic_32(uint32_t input_num) {
+  if (input_num == 0) {
+    return 32;
+  }
+  int last_bit = 0;
+  if (!(input_num & 0x0000FFFF)) {
+    input_num >>= 16;
+    last_bit |= 16;
+  }
+  if (!(input_num & 0x00FF)) {
+    input_num >>= 8;
+    last_bit |= 8;
+  }
+  if (!(input_num & 0x0F)) {
+    input_num >>= 4;
+    last_bit |= 4;
+  }
+  if (!(input_num & 0x3)) {
+    input_num >>= 2;
+    last_bit |= 2;
+  }
+  if (!(input_num & 0x1)) {
+    last_bit |= 1;
+  }
+  return last_bit;
+}
+
+/* count trailing zeroes for 32-bit integers */
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 int
+countr_zero_32(uint32_t input_num) {
+  if (cpp20_and_in_constexpr()) {
+    return countr_zero_generic_32(input_num);
+  }
+#ifdef FASTFLOAT_VISUAL_STUDIO
+  unsigned long trailing_zero = 0;
+  if (_BitScanForward(&trailing_zero, input_num)) {
+    return (int)trailing_zero;
+  }
+  return 32;
+#else
+  return input_num == 0 ? 32 : __builtin_ctz(input_num);
+#endif
+}
+
 // slow emulation routine for 32-bit
 fastfloat_really_inline constexpr uint64_t emulu(uint32_t x, uint32_t y) {
   return x * (uint64_t)y;
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
@@ -94,6 +94,7 @@ endif()
 option(FASTFLOAT_EXHAUSTIVE "Exhaustive tests" OFF)
 
 if (FASTFLOAT_EXHAUSTIVE)
+  fast_float_add_cpp_test(ipv4_test)
   fast_float_add_cpp_test(short_random_string)
   fast_float_add_cpp_test(exhaustive32_midpoint)
   fast_float_add_cpp_test(random_string)
diff --git a/tests/ipv4_test.cpp b/tests/ipv4_test.cpp
@@ -0,0 +1,93 @@
+
+#include <charconv>
+#include <cstdint>
+#include <iostream>
+#include <algorithm>
+#include "fast_float/fast_float.h"
+
+char *uint8_to_chars_manual(char *ptr, uint8_t value) {
+  if (value == 0) {
+    *ptr++ = '0';
+    return ptr;
+  }
+  char *start = ptr;
+  while (value > 0) {
+    *ptr++ = '0' + (value % 10);
+    value /= 10;
+  }
+  // Reverse the digits written so far
+  std::reverse(start, ptr);
+  return ptr;
+}
+
+void uint32_to_ipv4_string(uint32_t ip, char *buffer) {
+  uint8_t octets[4] = {static_cast<uint8_t>(ip >> 24),
+                       static_cast<uint8_t>(ip >> 16),
+                       static_cast<uint8_t>(ip >> 8), static_cast<uint8_t>(ip)};
+
+  char *ptr = buffer;
+
+  for (int i = 0; i < 4; ++i) {
+    ptr = uint8_to_chars_manual(ptr, octets[i]);
+
+    if (i < 3) {
+      *ptr++ = '.';
+    }
+  }
+  *ptr = '\0';
+}
+
+fastfloat_really_inline uint32_t ipv4_string_to_uint32(const char *str,
+                                                       const char *end) {
+  uint32_t ip = 0;
+  const char *current = str;
+
+  for (int i = 0; i < 4; ++i) {
+    uint8_t value;
+    auto r = fast_float::from_chars(current, end, value);
+    if (r.ec != std::errc()) {
+      throw std::invalid_argument("Invalid IP address format");
+    }
+    current = r.ptr;
+    ip = (ip << 8) | value;
+
+    if (i < 3) {
+      if (current == end || *current++ != '.') {
+        throw std::invalid_argument("Invalid IP address format");
+      }
+    }
+  }
+  return ip;
+}
+
+bool test_all_ipv4_conversions() {
+  std::cout << "Testing all IPv4 conversions... 0, 1000, 2000, 3000, 4000, "
+               "5000, 6000, 7000, 8000, 9000, ..."
+            << std::endl;
+  char buffer[16];
+  for (uint64_t ip = 0; ip <= 0xFFFFFFFF; ip += 1000) {
+    if (ip % 10000000 == 0) {
+      std::cout << "." << std::flush;
+    }
+    uint32_to_ipv4_string(static_cast<uint32_t>(ip), buffer);
+    const char *end = buffer + strlen(buffer);
+    uint32_t parsed_ip = ipv4_string_to_uint32(buffer, end);
+    if (parsed_ip != ip) {
+      std::cerr << "Mismatch: original " << ip << ", parsed " << parsed_ip
+                << std::endl;
+      return false;
+    }
+  }
+  std::cout << std::endl;
+  return true;
+}
+
+int main() {
+  if (test_all_ipv4_conversions()) {
+    std::cout << "All IPv4 conversions passed!" << std::endl;
+    return EXIT_SUCCESS;
+  } else {
+    std::cerr << "IPv4 conversion test failed!" << std::endl;
+    return EXIT_FAILURE;
+  }
+}