python · skirpichev · Dec 28, 2023 · Jan 12, 2024 · Jan 16, 2024 · Feb 20, 2024
diff --git a/Doc/c-api/conversion.rst b/Doc/c-api/conversion.rst
@@ -124,7 +124,7 @@ The following functions provide locale-independent string to number conversions.
    *format_code*, *precision*, and *flags*.
 
    *format_code* must be one of ``'e'``, ``'E'``, ``'f'``, ``'F'``,
-   ``'g'``, ``'G'`` or ``'r'``.  For ``'r'``, the supplied *precision*
+   ``'g'``, ``'G'``, ``'x'``, ``'X'`` or ``'r'``.  For ``'r'``, the supplied *precision*
    must be 0 and is ignored.  The ``'r'`` format code specifies the
    standard :func:`repr` format.
 
@@ -151,6 +151,9 @@ The following functions provide locale-independent string to number conversions.
 
    .. versionadded:: 3.1
 
+   .. versionchanged:: 3.14
+      Support ``'x'`` and ``'X'`` format types for :class:`float`.
+
 
 .. c:function:: int PyOS_stricmp(const char *s1, const char *s2)
 

diff --git a/Doc/library/string.rst b/Doc/library/string.rst
@@ -588,6 +588,30 @@ The available presentation types for :class:`float` and
    |         | as altered by the other format modifiers.                |
    +---------+----------------------------------------------------------+
 
+Additionally, for :class:`float` available following representation types:
+
+   +---------+----------------------------------------------------------+
+   | Type    | Meaning                                                  |
+   +=========+==========================================================+
+   | ``'x'`` | Represent the number by a hexadecimal string in the      |
+   |         | form ``[±][0x]h[.hhh]p±d``, where there is one           |
+   |         | hexadecimal digit before the dot and the fractional part |
+   |         | either is exact or the number of its hexadecimal digits  |
+   |         | is equal to the specified precision.  The exponent ``d`` |
+   |         | is written in decimal, it always contains at least one   |
+   |         | digit, and it gives the power of 2 by which to multiply  |
+   |         | the coefficient.                                         |
+   |         |                                                          |
+   |         | If the ``'#'`` option is specified, the prefix ``'0x'``  |
+   |         | will be inserted before an integer part.                 |
+   +---------+----------------------------------------------------------+
+   | ``'X'`` | Same as ``'x'``, but uses uppercase digits, the ``0X``   |
-   | ``'X'`` | Same as ``'x'``, but uses uppercase digits, the ``0X``   |
+   | ``'X'`` | Same as ``'x'``, but uses uppercase, the ``0X``   |
-   | ``'X'`` | Same as ``'x'``, but uses uppercase digits, the ``0X``   |
+   | ``'X'`` | Same as ``'x'``, but uses uppercase, the ``0X``   |
+   |         | prefix and ``'P'`` as the exponent separator.            |
+   +---------+----------------------------------------------------------+
+
+.. versionchanged:: 3.14
+   Support ``'x'`` and ``'X'`` format types for :class:`float`.
+
 
 .. _formatexamples:
 

diff --git a/Include/codecs.h b/Include/codecs.h
@@ -168,6 +168,7 @@ PyAPI_FUNC(PyObject *) PyCodec_NameReplaceErrors(PyObject *exc);
 
 #ifndef Py_LIMITED_API
 PyAPI_DATA(const char *) Py_hexdigits;
+PyAPI_DATA(const char *) Py_hexdigits_upper;
 #endif
 
 #ifdef __cplusplus

diff --git a/Include/internal/pycore_floatobject.h b/Include/internal/pycore_floatobject.h
@@ -55,6 +55,8 @@ extern PyObject* _Py_string_to_number_with_underscores(
 
 extern double _Py_parse_inf_or_nan(const char *p, char **endptr);
 
+extern char * _Py_dg_dtoa_hex(double x, int precision, int always_add_sign,
+                              int use_alt_formatting, int upper, int float_hex);
 
 #ifdef __cplusplus
 }

diff --git a/Lib/test/test_float.py b/Lib/test/test_float.py
@@ -700,12 +700,37 @@ def test_format(self):
         # % formatting
         self.assertEqual(format(-1.0, '%'), '-100.000000%')
 
+        # hexadecimal format
+        x = float.fromhex('0x0.0030p+0')
+        self.assertEqual(format(x, 'x'), '1.8p-11')
+        self.assertEqual(format(x, 'X'), '1.8P-11')
+        self.assertEqual(format(x, '.0x'), '1p-10')
+        x = float.fromhex('0x1.7p+0')
+        self.assertEqual(format(x, '.0x'), '1p+0')
+        x = float.fromhex('0x0.1p-1022')  # subnormal
+        self.assertEqual(format(x, 'x'), '0.1p-1022')
+        x = float.fromhex('0x0.0040p+0')
+        self.assertEqual(format(x, 'x'), '1p-10')
+        self.assertEqual(format(x, '>10x'),   '     1p-10')
+        self.assertEqual(format(x, '>#10x'),  '   0x1p-10')
+        self.assertEqual(format(x, '>010x'),  '000001p-10')
+        self.assertEqual(format(x, '>#010x'), '0000x1p-10')
+        self.assertEqual(format(x, '#010x'),  '0x0001p-10')
+        self.assertEqual(format(x, '<10x'),   '1p-10     ')
+        self.assertEqual(format(x, '<#10x'),  '0x1p-10   ')
+        x = float.fromhex('0x1.fe12p0')
+        self.assertEqual(format(x, 'x'), '1.fe12p+0')
+        self.assertEqual(format(x, '#X'), '0X1.FE12P+0')
+        self.assertEqual(format(x, '.3x'), '1.fe1p+0')
+        self.assertEqual(format(x, '.1x'), '1.0p+1')
+        self.assertEqual(format(x, '#.1x'), '0x1.0p+1')
+
         # conversion to string should fail
         self.assertRaises(ValueError, format, 3.0, "s")
 
-        # confirm format options expected to fail on floats, such as integer
-        # presentation types
-        for format_spec in 'sbcdoxX':
+        # confirm format options expected to fail on floats, such as some
+        # integer presentation types
+        for format_spec in 'sbcdo':
             self.assertRaises(ValueError, format, 0.0, format_spec)
             self.assertRaises(ValueError, format, 1.0, format_spec)
             self.assertRaises(ValueError, format, -1.0, format_spec)
@@ -1472,7 +1497,7 @@ def roundtrip(x):
             self.identical(x, roundtrip(x))
             self.identical(-x, roundtrip(-x))
 
-        # fromHex(toHex(x)) should exactly recover x, for any non-NaN float x.
+        # roundtrip(x) should exactly recover x, for any non-NaN float x.
         import random
         for i in range(10000):
             e = random.randrange(-1200, 1200)
@@ -1483,7 +1508,7 @@ def roundtrip(x):
             except OverflowError:
                 pass
             else:
-                self.identical(x, fromHex(toHex(x)))
+                self.identical(x, roundtrip(x))
 
     def test_subclass(self):
         class F(float):

diff --git a/Lib/test/test_str.py b/Lib/test/test_str.py
@@ -1322,9 +1322,7 @@ def __repr__(self):
 
         # test number formatter errors:
         self.assertRaises(ValueError, '{0:x}'.format, 1j)
-        self.assertRaises(ValueError, '{0:x}'.format, 1.0)
         self.assertRaises(ValueError, '{0:X}'.format, 1j)
-        self.assertRaises(ValueError, '{0:X}'.format, 1.0)
         self.assertRaises(ValueError, '{0:o}'.format, 1j)
         self.assertRaises(ValueError, '{0:o}'.format, 1.0)
         self.assertRaises(ValueError, '{0:u}'.format, 1j)

@@ -525,9 +525,9 @@ def test(f, format_spec, result):
         self.assertRaises(TypeError, 3.0.__format__, None)
         self.assertRaises(TypeError, 3.0.__format__, 0)
 
-        # confirm format options expected to fail on floats, such as integer
-        # presentation types
-        for format_spec in 'sbcdoxX':
+        # confirm format options expected to fail on floats, such as some
+        # integer presentation types
+        for format_spec in 'sbcdo':
             self.assertRaises(ValueError, format, 0.0, format_spec)
             self.assertRaises(ValueError, format, 1.0, format_spec)
             self.assertRaises(ValueError, format, -1.0, format_spec)

diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-02-29-05-06-03.gh-issue-113805.5oGkdZ.rst b/Misc/NEWS.d/next/Core and Builtins/2024-02-29-05-06-03.gh-issue-113805.5oGkdZ.rst
@@ -0,0 +1,2 @@
+Support formatting floats (using "x" and "X" format types) in hexadecimal
+notation, like ``0x1.2efp-2``.  Patch by Sergey B Kirpichev.
diff --git a/Objects/floatobject.c b/Objects/floatobject.c
@@ -1134,14 +1134,7 @@ float_conjugate_impl(PyObject *self)
     return float_float(self);
 }
 
-/* turn ASCII hex characters into integer values and vice versa */
-
-static char
-char_from_hex(int x)
-{
-    assert(0 <= x && x < 16);
-    return Py_hexdigits[x];
-}
+/* turn ASCII hex characters into integer values */
 
 static int
 hex_from_char(char c) {
@@ -1208,11 +1201,136 @@ hex_from_char(char c) {
     return x;
 }
 
-/* convert a float to a hexadecimal string */
+/* Convert a float to a hexadecimal string [±][0x]h[.hhhhhhhh]p±d,
+   where the fractional part either is exact (precision < 0) or the
+   number of digits after the dot is equal to the precision.
+
+   The return value is a pointer to buffer with the converted string or NULL if
+   the conversion failed.  The caller is responsible for freeing the returned
+   string by calling PyMem_Free().
+
+   The exponent d is written in decimal, it always contains at least one digit,
+   and it gives the power of 2 by which to multiply the coefficient.
+
+   x - the double to be converted
+   precision - the desired precision
+   always_add_sign - nonzero if a '+' sign should be included for x > 0
+   use_alt_formatting - nonzero if the hexadecimal prefix should be added.
+   upper - nonzero, if uppercase letters should be used for hexadecimal
+           numbers, prefix and the exponent separator.
+   float_hex - use float.hex() format.
+ */
+
+char *
+_Py_dg_dtoa_hex(double x, int precision, int always_add_sign,
+                int use_alt_formatting, int upper, int float_hex)
+{
+    int e;
+    double m = frexp(fabs(x), &e);
+
+    int autoprec = precision < 0;
+    if (autoprec) {
+        /* DBL_MANT_DIG rounded up to the next integer of the form 4k+1 */
+        const double tohex_nbits = DBL_MANT_DIG + 3 - (DBL_MANT_DIG+2)%4;
+        precision = (int) (tohex_nbits - 1)/4;
+        if (!x && float_hex) {
+            /* for compatibility with float.hex(), we keep just one
+               digit of zero */
+            precision = 1;
+        }
+    }
+
+    /* normalization */
+    if (m) {
+        int shift = 1 - Py_MAX(DBL_MIN_EXP - e, 0);
+        m = ldexp(m, shift);
+        e -= shift;
+    }
+
+    /* round to precision digits */
+    if (!autoprec) {
+        do {
+            double frac = ldexp(m, 4*precision);
+            frac -= floor(frac);
+            frac *= 16.0;
+            if (frac >= 8.0) {
+                m += ldexp(1.0, -4*precision);
+            }
+            if ((int)(m) & 0x2) {
+                m /= 2.0;
+                e += 1;
+            }
+            else {
+                break;
+            }
+        } while (1);
+    }
+
+    /* Conservative estimation for number of digits in the exponent.
+       IEEE quadruple precision floats should fit. */
+    const size_t exp_len = 5;
+
+    /* Allocate space for [±][0x]  h[.] [hhhhhhhh]   p±  d        '\0' */
+    size_t size =          1     + 2 +   precision + 2 + exp_len + 1;
+    if (use_alt_formatting) {
+        size += 2;
+    }
+    char *s = PyMem_Malloc(size);
+    if (!s) {
+        return NULL;
+    }
+
+    /* sign and prefix */
+    size_t si = 0;
+    if (copysign(1.0, x) == -1.0) {
+        s[si] = '-';
+        si++;
+    }
+    else if (always_add_sign) {
+        s[si] = '+';
+        si++;
+    }
+    if (use_alt_formatting) {
+        s[si] = '0';
+        si++;
+        s[si] = upper ? 'X' : 'x';
+        si++;
+    }
+
+    /* mantissa */
+    const char *hexmap = upper ? Py_hexdigits_upper : Py_hexdigits;
+    assert(0 <= (int)m && (int)m < 16);
+    s[si] = hexmap[(int)m];
+    si++;
+    m -= (int)m;
+    s[si] = '.';
+    for (int i = 0; i < precision; i++) {
+        si++;
+        m *= 16.0;
+        assert(0 <= (int)m && (int)m < 16);
+        s[si] = hexmap[(int)m];
+        m -= (int)m;
+    }
+
+    /* clear trailing zeros from mantissa */
+    if (autoprec && !float_hex) {
+        while (s[si] == '0') {
+            si--;
+        }
+    }
 
-/* TOHEX_NBITS is DBL_MANT_DIG rounded up to the next integer
-   of the form 4k+1. */
-#define TOHEX_NBITS DBL_MANT_DIG + 3 - (DBL_MANT_DIG+2)%4
+    /* clear trailing dot */
+    if (s[si] != '.') {
+        si++;
+    }
+
+    /* exponent */
+    s[si] = upper ? 'P' : 'p';
+    si++;
+    si += snprintf(s + si, exp_len + 2, "%+d", e) + 1;
+
+    return s;
+}
 
 /*[clinic input]
 float.hex
@@ -1229,54 +1347,24 @@ static PyObject *
 float_hex_impl(PyObject *self)
 /*[clinic end generated code: output=0ebc9836e4d302d4 input=bec1271a33d47e67]*/
 {
-    double x, m;
-    int e, shift, i, si, esign;
-    /* Space for 1+(TOHEX_NBITS-1)/4 digits, a decimal point, and the
-       trailing NUL byte. */
-    char s[(TOHEX_NBITS-1)/4+3];
-
-    CONVERT_TO_DOUBLE(self, x);
+    PyObject *result = NULL;
+    double x = PyFloat_AS_DOUBLE(self);
 
-    if (isnan(x) || isinf(x))
+    if (isnan(x) || isinf(x)) {
         return float_repr((PyFloatObject *)self);
-
-    if (x == 0.0) {
-        if (copysign(1.0, x) == -1.0)
-            return PyUnicode_FromString("-0x0.0p+0");
-        else
-            return PyUnicode_FromString("0x0.0p+0");
     }
 
-    m = frexp(fabs(x), &e);
-    shift = 1 - Py_MAX(DBL_MIN_EXP - e, 0);
-    m = ldexp(m, shift);
-    e -= shift;
+    char *buf = _Py_dg_dtoa_hex(x, -1, 0, 1, 0, 1);
 
-    si = 0;
-    s[si] = char_from_hex((int)m);
-    si++;
-    m -= (int)m;
-    s[si] = '.';
-    si++;
-    for (i=0; i < (TOHEX_NBITS-1)/4; i++) {
-        m *= 16.0;
-        s[si] = char_from_hex((int)m);
-        si++;
-        m -= (int)m;
+    if (buf) {
+        result = PyUnicode_FromString(buf);
+        PyMem_Free(buf);
     }
-    s[si] = '\0';
-
-    if (e < 0) {
-        esign = (int)'-';
-        e = -e;
+    else {
+        PyErr_NoMemory();
     }
-    else
-        esign = (int)'+';
 
-    if (x < 0.0)
-        return PyUnicode_FromFormat("-0x%sp%c%d", s, esign, e);
-    else
-        return PyUnicode_FromFormat("0x%sp%c%d", s, esign, e);
+    return result;
 }
 
 /* Convert a hexadecimal string to a float. */

diff --git a/Python/codecs.c b/Python/codecs.c
@@ -17,6 +17,7 @@ Copyright (c) Corporation for National Research Initiatives.
 #include "pycore_ucnhash.h"       // _PyUnicode_Name_CAPI
 
 const char *Py_hexdigits = "0123456789abcdef";
+const char *Py_hexdigits_upper = "0123456789ABCDEF";
 
 /* --- Codec Registry ----------------------------------------------------- */
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		Support formatting floats (using "x" and "X" format types) in hexadecimal
		notation, like ``0x1.2efp-2``. Patch by Sergey B Kirpichev.