pythongh-114667: Support hexadecimal floating point literals

skirpichev · skirpichev · commit 9af396d09bba · 2025-08-10T11:14:22.000+03:00
This add hexadecimal floating point literals (IEEE 754-2008 §5.12.3) and
support construction of floats from hexadecimal strings.  Note that the
syntax is more permissive: everything that is currently accepted by the
``float.fromhex()``, but with a mandatory base specifier; it also allows
grouping digits with underscores.

Examples:
```pycon
&gt;&gt;&gt; 0x1.1p-1
0.53125
&gt;&gt;&gt; float('0x1.1')
1.0625
&gt;&gt;&gt; 0x1.1
1.0625
&gt;&gt;&gt; 0x1.1_1_1
1.066650390625
```

Added compatibility code to not break access of existing int attributes.

E.g. 0x1.bit_length() will not require parentheses around the
hexadecimal integer literal (like 1.bit_length() for decimal int).

Minor changes: Py_ISDIGIT/ISXDIGIT macros were transformed to functions.
diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst
@@ -770,7 +770,8 @@ are always available.  They are listed here in alphabetical order.
       >>> float('-Infinity')
       -inf
 
-   If the argument is a string, it should contain a decimal number, optionally
+   If the argument is a string, it should contain a decimal number
+   or a hexadecimal number, optionally
    preceded by a sign, and optionally embedded in whitespace.  The optional
    sign may be ``'+'`` or ``'-'``; a ``'+'`` sign has no effect on the value
    produced.  The argument may also be a string representing a NaN
@@ -787,12 +788,16 @@ are always available.  They are listed here in alphabetical order.
       digitpart: `digit` (["_"] `digit`)*
       number: [`digitpart`] "." `digitpart` | `digitpart` ["."]
       exponent: ("e" | "E") [`sign`] `digitpart`
-      floatnumber: `number` [`exponent`]
+      floatnumber: (`number` [`exponent`]) | `hexfloatnumber`
       absfloatvalue: `floatnumber` | `infinity` | `nan`
       floatvalue: [`sign`] `absfloatvalue`
+      hexfloatnumber: `~python-grammar:hexinteger` | `~python-grammar:hexfraction` | `~python-grammar:hexfloat`
 
    Case is not significant, so, for example, "inf", "Inf", "INFINITY", and
-   "iNfINity" are all acceptable spellings for positive infinity.
+   "iNfINity" are all acceptable spellings for positive infinity.  Note also
+   that the exponent of a hexadecimal floating point number is written in
+   decimal, and that it gives the power of 2 by which to multiply the
+   coefficient.
 
    Otherwise, if the argument is an integer or a floating-point number, a
    floating-point number with the same value (within Python's floating-point
@@ -818,6 +823,9 @@ are always available.  They are listed here in alphabetical order.
    .. versionchanged:: 3.8
       Falls back to :meth:`~object.__index__` if :meth:`~object.__float__` is not defined.
 
+   .. versionchanged:: next
+      Added support for hexadecimal floating-point numbers.
+
 
 .. index::
    single: __format__
diff --git a/Doc/reference/lexical_analysis.rst b/Doc/reference/lexical_analysis.rst
@@ -1265,6 +1265,9 @@ The ``e`` or ``E`` represents "times ten raised to the power of"::
    1.166e-5  # (represents 1.166×10⁻⁵, or 0.00001166)
    6.02214076e+23  # (represents 6.02214076×10²³, or 602214076000000000000000.)
 
+The exponent of a hexadecimal floating point literal is written in decimal, and
+it gives the power of 2 by which to multiply the coefficient.
+
 In floats with only integer and exponent parts, the decimal point may be
 omitted::
 
@@ -1281,12 +1284,21 @@ lexical definitions:
       | `digitpart` "." [`digitpart`] [`exponent`]
       | "." `digitpart` [`exponent`]
       | `digitpart` `exponent`
+      | `hexfloat`
    digitpart: `digit` (["_"] `digit`)*
    exponent:  ("e" | "E") ["+" | "-"] `digitpart`
+   hexfloat: ("0x | "0X") ["_"] (`hexdigitpart` | `hexpointfloat`) [`binexponent`]
+   hexpointfloat: [`hexdigit`] `hexfraction` | `hexdigitpart` "."
+   hexfraction: "." `hexdigitpart`
+   hexdigitpart: `hexdigit` (["_"] `hexdigit`)*
+   binexponent: ("p" | "P") ["+" | "-"] `digitpart`
 
 .. versionchanged:: 3.6
    Underscores are now allowed for grouping purposes in literals.
 
+.. versionchanged:: next
+   Added support for hexadecimal floating-point literals.
+
 
 .. index::
    single: j; in numeric literal
diff --git a/Doc/tutorial/floatingpoint.rst b/Doc/tutorial/floatingpoint.rst
@@ -210,7 +210,7 @@ the float value exactly:
 
 .. doctest::
 
-    >>> x == float.fromhex('0x1.921f9f01b866ep+1')
+    >>> x == 0x1.921f9f01b866ep+1
     True
 
 Since the representation is exact, it is useful for reliably porting values
diff --git a/Include/cpython/pyctype.h b/Include/cpython/pyctype.h
@@ -21,11 +21,17 @@ PyAPI_DATA(const unsigned int) _Py_ctype_table[256];
 #define Py_ISLOWER(c)  (_Py_ctype_table[Py_CHARMASK(c)] & PY_CTF_LOWER)
 #define Py_ISUPPER(c)  (_Py_ctype_table[Py_CHARMASK(c)] & PY_CTF_UPPER)
 #define Py_ISALPHA(c)  (_Py_ctype_table[Py_CHARMASK(c)] & PY_CTF_ALPHA)
-#define Py_ISDIGIT(c)  (_Py_ctype_table[Py_CHARMASK(c)] & PY_CTF_DIGIT)
-#define Py_ISXDIGIT(c) (_Py_ctype_table[Py_CHARMASK(c)] & PY_CTF_XDIGIT)
 #define Py_ISALNUM(c)  (_Py_ctype_table[Py_CHARMASK(c)] & PY_CTF_ALNUM)
 #define Py_ISSPACE(c)  (_Py_ctype_table[Py_CHARMASK(c)] & PY_CTF_SPACE)
 
+static inline int Py_ISDIGIT(char c) {
+    return _Py_ctype_table[Py_CHARMASK(c)] & PY_CTF_DIGIT;
+}
+
+static inline int Py_ISXDIGIT(char c) {
+    return _Py_ctype_table[Py_CHARMASK(c)] & PY_CTF_XDIGIT;
+}
+
 PyAPI_DATA(const unsigned char) _Py_ctype_tolower[256];
 PyAPI_DATA(const unsigned char) _Py_ctype_toupper[256];
 
diff --git a/Include/internal/pycore_floatobject.h b/Include/internal/pycore_floatobject.h
@@ -42,6 +42,7 @@ extern double _Py_parse_inf_or_nan(const char *p, char **endptr);
 
 extern int _Py_convert_int_to_double(PyObject **v, double *dbl);
 
+extern double _Py_dg_strtod_hex(const char *str, char **ptr);
 
 #ifdef __cplusplus
 }
diff --git a/Lib/test/support/numbers.py b/Lib/test/support/numbers.py
@@ -24,6 +24,16 @@
     '.1_4j',
     '(1_2.5+3_3j)',
     '(.5_6j)',
+    '0x_.1p1',
+    '0X_.1p1',
+    '0x1_1.p1',
+    '0x_1_1.p1',
+    '0x1.1_1p1',
+    '0x1.p1_1',
+    '0xa.p1',
+    '0x.ap1',
+    '0xa_c.p1',
+    '0x.a_cp1',
 ]
 INVALID_UNDERSCORE_LITERALS = [
     # Trailing underscores:
@@ -35,6 +45,8 @@
     '0xf_',
     '0o5_',
     '0 if 1_Else 1',
+    '0x1p1_',
+    '0x1.1p1_',
     # Underscores in the base selector:
     '0_b0',
     '0_xf',
@@ -52,28 +64,39 @@
     '0o5__77',
     '1e1__0',
     '1e1__0j',
+    '0x1__1.1p1',
     # Underscore right before a dot:
     '1_.4',
     '1_.4j',
+    '0x1_.p1',
+    '0xa_.p1',
     # Underscore right after a dot:
     '1._4',
     '1._4j',
     '._5',
     '._5j',
+    '0x1._p1',
+    '0xa._p1',
     # Underscore right after a sign:
     '1.0e+_1',
     '1.0e+_1j',
+    '0x1.1p+_1',
     # Underscore right before j:
     '1.4_j',
     '1.4e5_j',
     # Underscore right before e:
     '1_e1',
     '1.4_e1',
     '1.4_e1j',
-    # Underscore right after e:
+    '0x1.1p1_j',
+    # Underscore right after e or p:
     '1e_1',
     '1.4e_1',
     '1.4e_1j',
+    '0x1_p1',
+    '0x1_P1',
+    '0x1.1_p1',
+    '0x1.1_P1',
     # Complex cases with parens:
     '(1+1.5_j_)',
     '(1+1.5_j)',
diff --git a/Lib/test/test_float.py b/Lib/test/test_float.py
@@ -63,9 +63,9 @@ def test_float(self):
         self.assertEqual(float(3.14), 3.14)
         self.assertEqual(float(314), 314.0)
         self.assertEqual(float("  3.14  "), 3.14)
-        self.assertRaises(ValueError, float, "  0x3.1  ")
-        self.assertRaises(ValueError, float, "  -0x3.p-1  ")
-        self.assertRaises(ValueError, float, "  +0x3.p-1  ")
+        self.assertEqual(float("  0x3.1  "), 3.0625)
+        self.assertEqual(float("  -0x3.p-1  "), -1.5)
+        self.assertEqual(float("  +0x3.p-1  "), 1.5)
         self.assertRaises(ValueError, float, "++3.14")
         self.assertRaises(ValueError, float, "+-3.14")
         self.assertRaises(ValueError, float, "-+3.14")
@@ -95,13 +95,13 @@ def test_noargs(self):
 
     def test_underscores(self):
         for lit in VALID_UNDERSCORE_LITERALS:
-            if not any(ch in lit for ch in 'jJxXoObB'):
+            if not any(ch in lit for ch in 'jJoObB'):
                 self.assertEqual(float(lit), eval(lit))
                 self.assertEqual(float(lit), float(lit.replace('_', '')))
         for lit in INVALID_UNDERSCORE_LITERALS:
             if lit in ('0_7', '09_99'):  # octals are not recognized here
                 continue
-            if not any(ch in lit for ch in 'jJxXoObB'):
+            if not any(ch in lit for ch in 'jJoObB'):
                 self.assertRaises(ValueError, float, lit)
         # Additional test cases; nan and inf are never valid as literals,
         # only in the float() constructor, but we don't allow underscores
@@ -198,9 +198,9 @@ def test_float_with_comma(self):
         self.assertRaises(ValueError, float, "  3,14  ")
         self.assertRaises(ValueError, float, "  +3,14  ")
         self.assertRaises(ValueError, float, "  -3,14  ")
-        self.assertRaises(ValueError, float, "  0x3.1  ")
-        self.assertRaises(ValueError, float, "  -0x3.p-1  ")
-        self.assertRaises(ValueError, float, "  +0x3.p-1  ")
+        self.assertEqual(float("  0x3.1  "), 3.0625)
+        self.assertEqual(float("  -0x3.p-1  "), -1.5)
+        self.assertEqual(float("  +0x3.p-1  "), 1.5)
         self.assertEqual(float("  25.e-1  "), 2.5)
         self.assertAlmostEqual(float("  .25e-1  "), .025)
 
@@ -1559,7 +1559,7 @@ def roundtrip(x):
             except OverflowError:
                 pass
             else:
-                self.identical(x, fromHex(toHex(x)))
+                self.identical(x, roundtrip(x))
 
     def test_subclass(self):
         class F(float):
diff --git a/Lib/test/test_grammar.py b/Lib/test/test_grammar.py
@@ -74,6 +74,15 @@ def test_plain_integers(self):
         else:
             self.fail('Weird maxsize value %r' % maxsize)
 
+    def test_attrs_on_hexintegers(self):
+        good_meth = [m for m in dir(int) if not m.startswith('_')]
+        for m in good_meth:
+            self.assertEqual(eval('0x1.' + m), eval('(0x1).' + m))
+        self.check_syntax_error('0x1.spam', "invalid hexadecimal literal",
+                                lineno=1, offset=4)
+        self.check_syntax_error('0x1.foo', "invalid hexadecimal literal",
+                                lineno=1, offset=5)
+
     def test_long_integers(self):
         x = 0
         x = 0xffffffffffffffff
@@ -97,6 +106,23 @@ def test_floats(self):
         x = 3.e14
         x = .3e14
         x = 3.1e4
+        x = 0x1.2p1
+        x = 0x1.2p+1
+        x = 0x1.p1
+        x = 0x1.p-1
+        x = 0x1p0
+        x = 0x1ap1
+        x = 0x1P1
+        x = 0x1cp2
+        x = 0x1.p1
+        x = 0x1.P1
+        x = 0x001.1p2
+        x = 0X1p1
+        x = 0x1.1_1p1
+        x = 0x1.1p1_1
+        x = 0x1.
+        x = 0x1.1
+        x = 0x.1
 
     def test_float_exponent_tokenization(self):
         # See issue 21642.
@@ -134,7 +160,14 @@ def test_bad_numerical_literals(self):
               "use an 0o prefix for octal integers")
         check("1.2_", "invalid decimal literal")
         check("1e2_", "invalid decimal literal")
-        check("1e+", "invalid decimal literal")
+        check("1e+", "invalid float literal")
+        check("0x.p", "invalid float literal")
+        check("0x_.p", "invalid float literal")
+        check("0x1.1p", "invalid float literal")
+        check("0x1.1_p", "invalid float literal")
+        check("0x1.1p_", "invalid float literal")
+        check("0xp", "invalid hexadecimal literal")
+        check("0xP", "invalid hexadecimal literal")
 
     def test_end_of_numerical_literals(self):
         def check(test, error=False):
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
@@ -273,6 +273,16 @@ def test_float(self):
     NAME       'x'           (1, 0) (1, 1)
     OP         '='           (1, 2) (1, 3)
     NUMBER     '3.14e159'    (1, 4) (1, 12)
+    """)
+        self.check_tokenize("x = 0x1p1", """\
+    NAME       'x'           (1, 0) (1, 1)
+    OP         '='           (1, 2) (1, 3)
+    NUMBER     '0x1p1'       (1, 4) (1, 9)
+    """)
+        self.check_tokenize("x = 0x.1p1", """\
+    NAME       'x'           (1, 0) (1, 1)
+    OP         '='           (1, 2) (1, 3)
+    NUMBER     '0x.1p1'      (1, 4) (1, 10)
     """)
 
     def test_underscore_literals(self):
diff --git a/Lib/tokenize.py b/Lib/tokenize.py
@@ -77,7 +77,10 @@ def maybe(*choices): return group(*choices) + '?'
 Pointfloat = group(r'[0-9](?:_?[0-9])*\.(?:[0-9](?:_?[0-9])*)?',
                    r'\.[0-9](?:_?[0-9])*') + maybe(Exponent)
 Expfloat = r'[0-9](?:_?[0-9])*' + Exponent
-Floatnumber = group(Pointfloat, Expfloat)
+HexExponent = r'[pP][-+]?[0-9](?:_?[0-9])*'
+Hexfloat = group(r'0[xX]_?[0-9a-f](?:_?[0-9a-f])*\.(?:[0-9a-f](?:_?[0-9a-f])*)?',
+                 r'0[xX]_?\.[0-9a-f](?:_?[0-9a-f])*') + HexExponent
+Floatnumber = group(Pointfloat, Expfloat, Hexfloat)
 Imagnumber = group(r'[0-9](?:_?[0-9])*[jJ]', Floatnumber + r'[jJ]')
 Number = group(Imagnumber, Floatnumber, Intnumber)
 
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-01-28-08-17-08.gh-issue-114667.8w_l9I.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-01-28-08-17-08.gh-issue-114667.8w_l9I.rst
@@ -0,0 +1,3 @@
+Add hexadecimal floating point literals (IEEE 754-2008 §5.12.3) and support
+construction of floats from hexadecimal strings.  Patch by Sergey B
+Kirpichev.
diff --git a/Objects/floatobject.c b/Objects/floatobject.c
diff --git a/Parser/lexer/lexer.c b/Parser/lexer/lexer.c
diff --git a/Python/dtoa.c b/Python/dtoa.c
diff --git a/Python/pystrtod.c b/Python/pystrtod.c

Original file line number	Diff line number	Diff line change
`@@ -42,6 +42,7 @@ extern double _Py_parse_inf_or_nan(const char p, char *endptr);`
`42`	`42`
`43`	`43`	`extern int _Py_convert_int_to_double(PyObject *v, double dbl);`
`44`	`44`
	`45`	`+extern double _Py_dg_strtod_hex(const char str, char *ptr);`
`45`	`46`
`46`	`47`	`#ifdef __cplusplus`
`47`	`48`	`}`