From 9af396d09bba5d1c96e59036982059d53e95727b Mon Sep 17 00:00:00 2001
From: Sergey B Kirpichev <skirpichev@gmail.com>
Date: Sun, 7 Jan 2024 07:52:57 +0300
Subject: [PATCH 1/2] gh-114667: Support hexadecimal floating point literals
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This add hexadecimal floating point literals (IEEE 754-2008 §5.12.3) and
support construction of floats from hexadecimal strings.  Note that the
syntax is more permissive: everything that is currently accepted by the
``float.fromhex()``, but with a mandatory base specifier; it also allows
grouping digits with underscores.

Examples:
```pycon
>>> 0x1.1p-1
0.53125
>>> float('0x1.1')
1.0625
>>> 0x1.1
1.0625
>>> 0x1.1_1_1
1.066650390625
```

Added compatibility code to not break access of existing int attributes.

E.g. 0x1.bit_length() will not require parentheses around the
hexadecimal integer literal (like 1.bit_length() for decimal int).

Minor changes: Py_ISDIGIT/ISXDIGIT macros were transformed to functions.
---
 Doc/library/functions.rst                     |  14 ++-
 Doc/reference/lexical_analysis.rst            |  12 +++
 Doc/tutorial/floatingpoint.rst                |   2 +-
 Include/cpython/pyctype.h                     |  10 +-
 Include/internal/pycore_floatobject.h         |   1 +
 Lib/test/support/numbers.py                   |  25 ++++-
 Lib/test/test_float.py                        |  18 ++--
 Lib/test/test_grammar.py                      |  35 +++++-
 Lib/test/test_tokenize.py                     |  10 ++
 Lib/tokenize.py                               |   5 +-
 ...-01-28-08-17-08.gh-issue-114667.8w_l9I.rst |   3 +
 Objects/floatobject.c                         | 100 ++++++++++++------
 Parser/lexer/lexer.c                          |  83 +++++++++++----
 Python/dtoa.c                                 |   4 +
 Python/pystrtod.c                             |  39 ++++---
 15 files changed, 277 insertions(+), 84 deletions(-)
 create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-01-28-08-17-08.gh-issue-114667.8w_l9I.rst

diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst
index 857b40f3ba155c..2cf9e286147235 100644
--- a/Doc/library/functions.rst
+++ b/Doc/library/functions.rst
@@ -770,7 +770,8 @@ are always available.  They are listed here in alphabetical order.
       >>> float('-Infinity')
       -inf
 
-   If the argument is a string, it should contain a decimal number, optionally
+   If the argument is a string, it should contain a decimal number
+   or a hexadecimal number, optionally
    preceded by a sign, and optionally embedded in whitespace.  The optional
    sign may be ``'+'`` or ``'-'``; a ``'+'`` sign has no effect on the value
    produced.  The argument may also be a string representing a NaN
@@ -787,12 +788,16 @@ are always available.  They are listed here in alphabetical order.
       digitpart: `digit` (["_"] `digit`)*
       number: [`digitpart`] "." `digitpart` | `digitpart` ["."]
       exponent: ("e" | "E") [`sign`] `digitpart`
-      floatnumber: `number` [`exponent`]
+      floatnumber: (`number` [`exponent`]) | `hexfloatnumber`
       absfloatvalue: `floatnumber` | `infinity` | `nan`
       floatvalue: [`sign`] `absfloatvalue`
+      hexfloatnumber: `~python-grammar:hexinteger` | `~python-grammar:hexfraction` | `~python-grammar:hexfloat`
 
    Case is not significant, so, for example, "inf", "Inf", "INFINITY", and
-   "iNfINity" are all acceptable spellings for positive infinity.
+   "iNfINity" are all acceptable spellings for positive infinity.  Note also
+   that the exponent of a hexadecimal floating point number is written in
+   decimal, and that it gives the power of 2 by which to multiply the
+   coefficient.
 
    Otherwise, if the argument is an integer or a floating-point number, a
    floating-point number with the same value (within Python's floating-point
@@ -818,6 +823,9 @@ are always available.  They are listed here in alphabetical order.
    .. versionchanged:: 3.8
       Falls back to :meth:`~object.__index__` if :meth:`~object.__float__` is not defined.
 
+   .. versionchanged:: next
+      Added support for hexadecimal floating-point numbers.
+
 
 .. index::
    single: __format__
diff --git a/Doc/reference/lexical_analysis.rst b/Doc/reference/lexical_analysis.rst
index e320eedfa67a27..f69bf024c4411e 100644
--- a/Doc/reference/lexical_analysis.rst
+++ b/Doc/reference/lexical_analysis.rst
@@ -1265,6 +1265,9 @@ The ``e`` or ``E`` represents "times ten raised to the power of"::
    1.166e-5  # (represents 1.166×10⁻⁵, or 0.00001166)
    6.02214076e+23  # (represents 6.02214076×10²³, or 602214076000000000000000.)
 
+The exponent of a hexadecimal floating point literal is written in decimal, and
+it gives the power of 2 by which to multiply the coefficient.
+
 In floats with only integer and exponent parts, the decimal point may be
 omitted::
 
@@ -1281,12 +1284,21 @@ lexical definitions:
       | `digitpart` "." [`digitpart`] [`exponent`]
       | "." `digitpart` [`exponent`]
       | `digitpart` `exponent`
+      | `hexfloat`
    digitpart: `digit` (["_"] `digit`)*
    exponent:  ("e" | "E") ["+" | "-"] `digitpart`
+   hexfloat: ("0x | "0X") ["_"] (`hexdigitpart` | `hexpointfloat`) [`binexponent`]
+   hexpointfloat: [`hexdigit`] `hexfraction` | `hexdigitpart` "."
+   hexfraction: "." `hexdigitpart`
+   hexdigitpart: `hexdigit` (["_"] `hexdigit`)*
+   binexponent: ("p" | "P") ["+" | "-"] `digitpart`
 
 .. versionchanged:: 3.6
    Underscores are now allowed for grouping purposes in literals.
 
+.. versionchanged:: next
+   Added support for hexadecimal floating-point literals.
+
 
 .. index::
    single: j; in numeric literal
diff --git a/Doc/tutorial/floatingpoint.rst b/Doc/tutorial/floatingpoint.rst
index dfe2d1d3a8378f..44baeee12d1165 100644
--- a/Doc/tutorial/floatingpoint.rst
+++ b/Doc/tutorial/floatingpoint.rst
@@ -210,7 +210,7 @@ the float value exactly:
 
 .. doctest::
 
-    >>> x == float.fromhex('0x1.921f9f01b866ep+1')
+    >>> x == 0x1.921f9f01b866ep+1
     True
 
 Since the representation is exact, it is useful for reliably porting values
diff --git a/Include/cpython/pyctype.h b/Include/cpython/pyctype.h
index 729d93275e6c53..71c870080fe5ad 100644
--- a/Include/cpython/pyctype.h
+++ b/Include/cpython/pyctype.h
@@ -21,11 +21,17 @@ PyAPI_DATA(const unsigned int) _Py_ctype_table[256];
 #define Py_ISLOWER(c)  (_Py_ctype_table[Py_CHARMASK(c)] & PY_CTF_LOWER)
 #define Py_ISUPPER(c)  (_Py_ctype_table[Py_CHARMASK(c)] & PY_CTF_UPPER)
 #define Py_ISALPHA(c)  (_Py_ctype_table[Py_CHARMASK(c)] & PY_CTF_ALPHA)
-#define Py_ISDIGIT(c)  (_Py_ctype_table[Py_CHARMASK(c)] & PY_CTF_DIGIT)
-#define Py_ISXDIGIT(c) (_Py_ctype_table[Py_CHARMASK(c)] & PY_CTF_XDIGIT)
 #define Py_ISALNUM(c)  (_Py_ctype_table[Py_CHARMASK(c)] & PY_CTF_ALNUM)
 #define Py_ISSPACE(c)  (_Py_ctype_table[Py_CHARMASK(c)] & PY_CTF_SPACE)
 
+static inline int Py_ISDIGIT(char c) {
+    return _Py_ctype_table[Py_CHARMASK(c)] & PY_CTF_DIGIT;
+}
+
+static inline int Py_ISXDIGIT(char c) {
+    return _Py_ctype_table[Py_CHARMASK(c)] & PY_CTF_XDIGIT;
+}
+
 PyAPI_DATA(const unsigned char) _Py_ctype_tolower[256];
 PyAPI_DATA(const unsigned char) _Py_ctype_toupper[256];
 
diff --git a/Include/internal/pycore_floatobject.h b/Include/internal/pycore_floatobject.h
index 317f984188bad8..0e513cc897f906 100644
--- a/Include/internal/pycore_floatobject.h
+++ b/Include/internal/pycore_floatobject.h
@@ -42,6 +42,7 @@ extern double _Py_parse_inf_or_nan(const char *p, char **endptr);
 
 extern int _Py_convert_int_to_double(PyObject **v, double *dbl);
 
+extern double _Py_dg_strtod_hex(const char *str, char **ptr);
 
 #ifdef __cplusplus
 }
diff --git a/Lib/test/support/numbers.py b/Lib/test/support/numbers.py
index d5dbb41acebc38..ce7bde75495b9f 100644
--- a/Lib/test/support/numbers.py
+++ b/Lib/test/support/numbers.py
@@ -24,6 +24,16 @@
     '.1_4j',
     '(1_2.5+3_3j)',
     '(.5_6j)',
+    '0x_.1p1',
+    '0X_.1p1',
+    '0x1_1.p1',
+    '0x_1_1.p1',
+    '0x1.1_1p1',
+    '0x1.p1_1',
+    '0xa.p1',
+    '0x.ap1',
+    '0xa_c.p1',
+    '0x.a_cp1',
 ]
 INVALID_UNDERSCORE_LITERALS = [
     # Trailing underscores:
@@ -35,6 +45,8 @@
     '0xf_',
     '0o5_',
     '0 if 1_Else 1',
+    '0x1p1_',
+    '0x1.1p1_',
     # Underscores in the base selector:
     '0_b0',
     '0_xf',
@@ -52,17 +64,23 @@
     '0o5__77',
     '1e1__0',
     '1e1__0j',
+    '0x1__1.1p1',
     # Underscore right before a dot:
     '1_.4',
     '1_.4j',
+    '0x1_.p1',
+    '0xa_.p1',
     # Underscore right after a dot:
     '1._4',
     '1._4j',
     '._5',
     '._5j',
+    '0x1._p1',
+    '0xa._p1',
     # Underscore right after a sign:
     '1.0e+_1',
     '1.0e+_1j',
+    '0x1.1p+_1',
     # Underscore right before j:
     '1.4_j',
     '1.4e5_j',
@@ -70,10 +88,15 @@
     '1_e1',
     '1.4_e1',
     '1.4_e1j',
-    # Underscore right after e:
+    '0x1.1p1_j',
+    # Underscore right after e or p:
     '1e_1',
     '1.4e_1',
     '1.4e_1j',
+    '0x1_p1',
+    '0x1_P1',
+    '0x1.1_p1',
+    '0x1.1_P1',
     # Complex cases with parens:
     '(1+1.5_j_)',
     '(1+1.5_j)',
diff --git a/Lib/test/test_float.py b/Lib/test/test_float.py
index 00518abcb11b46..70b5bc6ca83f47 100644
--- a/Lib/test/test_float.py
+++ b/Lib/test/test_float.py
@@ -63,9 +63,9 @@ def test_float(self):
         self.assertEqual(float(3.14), 3.14)
         self.assertEqual(float(314), 314.0)
         self.assertEqual(float("  3.14  "), 3.14)
-        self.assertRaises(ValueError, float, "  0x3.1  ")
-        self.assertRaises(ValueError, float, "  -0x3.p-1  ")
-        self.assertRaises(ValueError, float, "  +0x3.p-1  ")
+        self.assertEqual(float("  0x3.1  "), 3.0625)
+        self.assertEqual(float("  -0x3.p-1  "), -1.5)
+        self.assertEqual(float("  +0x3.p-1  "), 1.5)
         self.assertRaises(ValueError, float, "++3.14")
         self.assertRaises(ValueError, float, "+-3.14")
         self.assertRaises(ValueError, float, "-+3.14")
@@ -95,13 +95,13 @@ def test_noargs(self):
 
     def test_underscores(self):
         for lit in VALID_UNDERSCORE_LITERALS:
-            if not any(ch in lit for ch in 'jJxXoObB'):
+            if not any(ch in lit for ch in 'jJoObB'):
                 self.assertEqual(float(lit), eval(lit))
                 self.assertEqual(float(lit), float(lit.replace('_', '')))
         for lit in INVALID_UNDERSCORE_LITERALS:
             if lit in ('0_7', '09_99'):  # octals are not recognized here
                 continue
-            if not any(ch in lit for ch in 'jJxXoObB'):
+            if not any(ch in lit for ch in 'jJoObB'):
                 self.assertRaises(ValueError, float, lit)
         # Additional test cases; nan and inf are never valid as literals,
         # only in the float() constructor, but we don't allow underscores
@@ -198,9 +198,9 @@ def test_float_with_comma(self):
         self.assertRaises(ValueError, float, "  3,14  ")
         self.assertRaises(ValueError, float, "  +3,14  ")
         self.assertRaises(ValueError, float, "  -3,14  ")
-        self.assertRaises(ValueError, float, "  0x3.1  ")
-        self.assertRaises(ValueError, float, "  -0x3.p-1  ")
-        self.assertRaises(ValueError, float, "  +0x3.p-1  ")
+        self.assertEqual(float("  0x3.1  "), 3.0625)
+        self.assertEqual(float("  -0x3.p-1  "), -1.5)
+        self.assertEqual(float("  +0x3.p-1  "), 1.5)
         self.assertEqual(float("  25.e-1  "), 2.5)
         self.assertAlmostEqual(float("  .25e-1  "), .025)
 
@@ -1559,7 +1559,7 @@ def roundtrip(x):
             except OverflowError:
                 pass
             else:
-                self.identical(x, fromHex(toHex(x)))
+                self.identical(x, roundtrip(x))
 
     def test_subclass(self):
         class F(float):
diff --git a/Lib/test/test_grammar.py b/Lib/test/test_grammar.py
index 7f5d48b9c63ab7..818aff9db3eaf2 100644
--- a/Lib/test/test_grammar.py
+++ b/Lib/test/test_grammar.py
@@ -74,6 +74,15 @@ def test_plain_integers(self):
         else:
             self.fail('Weird maxsize value %r' % maxsize)
 
+    def test_attrs_on_hexintegers(self):
+        good_meth = [m for m in dir(int) if not m.startswith('_')]
+        for m in good_meth:
+            self.assertEqual(eval('0x1.' + m), eval('(0x1).' + m))
+        self.check_syntax_error('0x1.spam', "invalid hexadecimal literal",
+                                lineno=1, offset=4)
+        self.check_syntax_error('0x1.foo', "invalid hexadecimal literal",
+                                lineno=1, offset=5)
+
     def test_long_integers(self):
         x = 0
         x = 0xffffffffffffffff
@@ -97,6 +106,23 @@ def test_floats(self):
         x = 3.e14
         x = .3e14
         x = 3.1e4
+        x = 0x1.2p1
+        x = 0x1.2p+1
+        x = 0x1.p1
+        x = 0x1.p-1
+        x = 0x1p0
+        x = 0x1ap1
+        x = 0x1P1
+        x = 0x1cp2
+        x = 0x1.p1
+        x = 0x1.P1
+        x = 0x001.1p2
+        x = 0X1p1
+        x = 0x1.1_1p1
+        x = 0x1.1p1_1
+        x = 0x1.
+        x = 0x1.1
+        x = 0x.1
 
     def test_float_exponent_tokenization(self):
         # See issue 21642.
@@ -134,7 +160,14 @@ def test_bad_numerical_literals(self):
               "use an 0o prefix for octal integers")
         check("1.2_", "invalid decimal literal")
         check("1e2_", "invalid decimal literal")
-        check("1e+", "invalid decimal literal")
+        check("1e+", "invalid float literal")
+        check("0x.p", "invalid float literal")
+        check("0x_.p", "invalid float literal")
+        check("0x1.1p", "invalid float literal")
+        check("0x1.1_p", "invalid float literal")
+        check("0x1.1p_", "invalid float literal")
+        check("0xp", "invalid hexadecimal literal")
+        check("0xP", "invalid hexadecimal literal")
 
     def test_end_of_numerical_literals(self):
         def check(test, error=False):
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
index 865e0c5b40ddd3..a8e5438424d345 100644
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -273,6 +273,16 @@ def test_float(self):
     NAME       'x'           (1, 0) (1, 1)
     OP         '='           (1, 2) (1, 3)
     NUMBER     '3.14e159'    (1, 4) (1, 12)
+    """)
+        self.check_tokenize("x = 0x1p1", """\
+    NAME       'x'           (1, 0) (1, 1)
+    OP         '='           (1, 2) (1, 3)
+    NUMBER     '0x1p1'       (1, 4) (1, 9)
+    """)
+        self.check_tokenize("x = 0x.1p1", """\
+    NAME       'x'           (1, 0) (1, 1)
+    OP         '='           (1, 2) (1, 3)
+    NUMBER     '0x.1p1'      (1, 4) (1, 10)
     """)
 
     def test_underscore_literals(self):
diff --git a/Lib/tokenize.py b/Lib/tokenize.py
index 7e71755068e1df..82cae0d3e9da07 100644
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -77,7 +77,10 @@ def maybe(*choices): return group(*choices) + '?'
 Pointfloat = group(r'[0-9](?:_?[0-9])*\.(?:[0-9](?:_?[0-9])*)?',
                    r'\.[0-9](?:_?[0-9])*') + maybe(Exponent)
 Expfloat = r'[0-9](?:_?[0-9])*' + Exponent
-Floatnumber = group(Pointfloat, Expfloat)
+HexExponent = r'[pP][-+]?[0-9](?:_?[0-9])*'
+Hexfloat = group(r'0[xX]_?[0-9a-f](?:_?[0-9a-f])*\.(?:[0-9a-f](?:_?[0-9a-f])*)?',
+                 r'0[xX]_?\.[0-9a-f](?:_?[0-9a-f])*') + HexExponent
+Floatnumber = group(Pointfloat, Expfloat, Hexfloat)
 Imagnumber = group(r'[0-9](?:_?[0-9])*[jJ]', Floatnumber + r'[jJ]')
 Number = group(Imagnumber, Floatnumber, Intnumber)
 
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-01-28-08-17-08.gh-issue-114667.8w_l9I.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-01-28-08-17-08.gh-issue-114667.8w_l9I.rst
new file mode 100644
index 00000000000000..c01c0cde3e1892
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-01-28-08-17-08.gh-issue-114667.8w_l9I.rst
@@ -0,0 +1,3 @@
+Add hexadecimal floating point literals (IEEE 754-2008 §5.12.3) and support
+construction of floats from hexadecimal strings.  Patch by Sergey B
+Kirpichev.
diff --git a/Objects/floatobject.c b/Objects/floatobject.c
index 93e1973d6b32fc..fb09d1b098083a 100644
--- a/Objects/floatobject.c
+++ b/Objects/floatobject.c
@@ -1247,11 +1247,64 @@ float_fromhex_impl(PyTypeObject *type, PyObject *string)
 /*[clinic end generated code: output=c54b4923552e5af5 input=0407bebd354bca89]*/
 {
     PyObject *result;
+    Py_ssize_t length;
+    const char *s, *end, *last;
+    double x;
+
+    s = PyUnicode_AsUTF8AndSize(string, &length);
+    if (s == NULL) {
+        return NULL;
+    }
+    last = s + length;
+
+    while (Py_ISSPACE(*s)) {
+        s++;
+    }
+    while (s < last - 1 && Py_ISSPACE(last[-1])) {
+        last--;
+    }
+
+    errno = 0;
+    x = _Py_dg_strtod_hex(s, (char **)&end);
+
+    if (errno == ERANGE) {
+        PyErr_SetString(PyExc_OverflowError,
+                        "hexadecimal value too large to represent as a float");
+        return NULL;
+    }
+
+    if (end != last) {
+        if (end != s && (*end && !Py_ISSPACE(*end))) {
+            PyErr_SetString(PyExc_ValueError,
+                            "hexadecimal string too long to convert");
+            return NULL;
+        }
+        /* Nothing parsed, maybe inf/nan? */
+        x = _Py_parse_inf_or_nan(s, (char **)&end);
+    }
+    if (end != last || end == s) {
+        PyErr_SetString(PyExc_ValueError,
+                        "invalid hexadecimal floating-point string");
+        return NULL;
+    }
+
+    result = PyFloat_FromDouble(x);
+    if (type != &PyFloat_Type && result != NULL) {
+        Py_SETREF(result, PyObject_CallOneArg((PyObject *)type, result));
+    }
+    return result;
+}
+
+double
+_Py_dg_strtod_hex(const char *s00, char **se)
+{
     double x;
     long exp, top_exp, lsb, key_digit;
-    const char *s, *coeff_start, *s_store, *coeff_end, *exp_start, *s_end;
+    const char *coeff_start, *s_store, *coeff_end, *exp_start, *s = s00;
     int half_eps, digit, round_up, negate=0;
-    Py_ssize_t length, ndigits, fdigits, i;
+    Py_ssize_t ndigits, fdigits, i;
+
+    *se = (char *)s00;
 
     /*
      * For the sake of simplicity and correctness, we impose an artificial
@@ -1298,11 +1351,6 @@ float_fromhex_impl(PyTypeObject *type, PyObject *string)
      * exp+4*ndigits and exp-4*ndigits are within the range of a long.
      */
 
-    s = PyUnicode_AsUTF8AndSize(string, &length);
-    if (s == NULL)
-        return NULL;
-    s_end = s + length;
-
     /********************
      * Parse the string *
      ********************/
@@ -1311,13 +1359,6 @@ float_fromhex_impl(PyTypeObject *type, PyObject *string)
     while (Py_ISSPACE(*s))
         s++;
 
-    /* infinities and nans */
-    x = _Py_parse_inf_or_nan(s, (char **)&coeff_end);
-    if (coeff_end != s) {
-        s = coeff_end;
-        goto finished;
-    }
-
     /* optional sign */
     if (*s == '-') {
         s++;
@@ -1356,8 +1397,10 @@ float_fromhex_impl(PyTypeObject *type, PyObject *string)
     if (ndigits == 0)
         goto parse_error;
     if (ndigits > Py_MIN(DBL_MIN_EXP - DBL_MANT_DIG - LONG_MIN/2,
-                         LONG_MAX/2 + 1 - DBL_MAX_EXP)/4)
+                         LONG_MAX/2 + 1 - DBL_MAX_EXP)/4) {
+        *se = (char*)coeff_end;
         goto insane_length_error;
+    }
 
     /* [p <exponent>] */
     if (*s == 'p' || *s == 'P') {
@@ -1456,31 +1499,20 @@ float_fromhex_impl(PyTypeObject *type, PyObject *string)
     x = ldexp(x, (int)(exp+4*key_digit));
 
   finished:
-    /* optional trailing whitespace leading to the end of the string */
-    while (Py_ISSPACE(*s))
-        s++;
-    if (s != s_end)
+    if (*s && !Py_ISSPACE(*s))
         goto parse_error;
-    result = PyFloat_FromDouble(negate ? -x : x);
-    if (type != &PyFloat_Type && result != NULL) {
-        Py_SETREF(result, PyObject_CallOneArg((PyObject *)type, result));
-    }
-    return result;
+    *se = (char *)s;
+    errno = 0;
+    return negate ? -x : x;
 
   overflow_error:
-    PyErr_SetString(PyExc_OverflowError,
-                    "hexadecimal value too large to represent as a float");
-    return NULL;
+    errno = ERANGE;
+    return HUGE_VAL;
 
   parse_error:
-    PyErr_SetString(PyExc_ValueError,
-                    "invalid hexadecimal floating-point string");
-    return NULL;
-
   insane_length_error:
-    PyErr_SetString(PyExc_ValueError,
-                    "hexadecimal string too long to convert");
-    return NULL;
+    errno = 0;
+    return 0.0;
 }
 
 /*[clinic input]
diff --git a/Parser/lexer/lexer.c b/Parser/lexer/lexer.c
index 81363cf8e810fe..a38f264c40aa47 100644
--- a/Parser/lexer/lexer.c
+++ b/Parser/lexer/lexer.c
@@ -410,21 +410,23 @@ verify_identifier(struct tok_state *tok)
 }
 
 static int
-tok_decimal_tail(struct tok_state *tok)
+tok_digits_tail(struct tok_state *tok, int base)
 {
     int c;
+    int (*_isdigit)(char) = base == 16 ? &Py_ISXDIGIT : &Py_ISDIGIT;
 
     while (1) {
         do {
             c = tok_nextc(tok);
-        } while (Py_ISDIGIT(c));
+        } while ((*_isdigit)(c));
         if (c != '_') {
             break;
         }
         c = tok_nextc(tok);
-        if (!Py_ISDIGIT(c)) {
+        if (!(*_isdigit)(c)) {
             tok_backup(tok, c);
-            _PyTokenizer_syntaxerror(tok, "invalid decimal literal");
+            _PyTokenizer_syntaxerror(tok, "invalid %s literal",
+                                     base == 16 ? "hexadecimal" : "decimal");
             return 0;
         }
     }
@@ -856,20 +858,61 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
             /* Hex, octal or binary -- maybe. */
             c = tok_nextc(tok);
             if (c == 'x' || c == 'X') {
-                /* Hex */
+                /* Hex integer/float */
                 c = tok_nextc(tok);
-                do {
-                    if (c == '_') {
-                        c = tok_nextc(tok);
-                    }
+                if (c == '_') {
+                    c = tok_nextc(tok);
+                }
+                if (c == '.') {
+                    c = tok_nextc(tok);
                     if (!Py_ISXDIGIT(c)) {
                         tok_backup(tok, c);
-                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid hexadecimal literal"));
+                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid float literal"));
                     }
-                    do {
-                        c = tok_nextc(tok);
-                    } while (Py_ISXDIGIT(c));
-                } while (c == '_');
+                    goto hexfraction;
+                }
+                else if (!Py_ISXDIGIT(c)) {
+                    tok_backup(tok, c);
+                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid hexadecimal literal"));
+                }
+                c = tok_digits_tail(tok, 16);
+                if (c == 0) {
+                    return MAKE_TOKEN(ERRORTOKEN);
+                }
+                if (c == '.') {
+                    c = tok_nextc(tok);
+        hexfraction:
+                    /* Allow attribute access on hexadecimal integer literals for
+                     * for existing public attributes on int's, e.g. 0x1.bit_length(). */
+                    if ((c == 'a' && lookahead(tok, "s_integer_ratio")) ||
+                        (c == 't' && lookahead(tok, "o_bytes")) ||
+                        (c == 'b' && (lookahead(tok, "it_count") ||
+                                      lookahead(tok, "it_length"))) ||
+                        (c == 'c' && lookahead(tok, "onjugate")) ||
+                        (c == 'd' && lookahead(tok, "enominator")) ||
+                        (c == 'f' && lookahead(tok, "rom_bytes")) ||
+                        (c == 'i' && (lookahead(tok, "mag") ||
+                                      lookahead(tok, "s_integer"))) ||
+                        (c == 'n' && lookahead(tok, "umerator")) ||
+                        (c == 'r' && lookahead(tok, "eal")))
+                    {
+                        tok_backup(tok, c);
+                        c = '.';
+                        goto hexint;
+                    }
+                    if (Py_ISXDIGIT(c)) {
+                        c = tok_digits_tail(tok, 16);
+                        if (c == 0) {
+                            tok->done = E_OK;
+                            _PyTokenizer_syntaxerror(tok, "invalid float literal");
+                            return MAKE_TOKEN(ERRORTOKEN);
+                        }
+                    }
+                }
+                if (c == 'p' || c == 'P') {
+                    goto exponent;
+                }
+         hexint:
                 if (!verify_end_of_number(tok, c, "hexadecimal")) {
                     return MAKE_TOKEN(ERRORTOKEN);
                 }
@@ -950,7 +993,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
                 char* zeros_end = tok->cur;
                 if (Py_ISDIGIT(c)) {
                     nonzero = 1;
-                    c = tok_decimal_tail(tok);
+                    c = tok_digits_tail(tok, 10);
                     if (c == 0) {
                         return MAKE_TOKEN(ERRORTOKEN);
                     }
@@ -982,7 +1025,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
         }
         else {
             /* Decimal */
-            c = tok_decimal_tail(tok);
+            c = tok_digits_tail(tok, 10);
             if (c == 0) {
                 return MAKE_TOKEN(ERRORTOKEN);
             }
@@ -993,7 +1036,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
         fraction:
                     /* Fraction */
                     if (Py_ISDIGIT(c)) {
-                        c = tok_decimal_tail(tok);
+                        c = tok_digits_tail(tok, 10);
                         if (c == 0) {
                             return MAKE_TOKEN(ERRORTOKEN);
                         }
@@ -1009,11 +1052,11 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
                         c = tok_nextc(tok);
                         if (!Py_ISDIGIT(c)) {
                             tok_backup(tok, c);
-                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid decimal literal"));
+                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid float literal"));
                         }
                     } else if (!Py_ISDIGIT(c)) {
                         tok_backup(tok, c);
-                        if (!verify_end_of_number(tok, e, "decimal")) {
+                        if (!verify_end_of_number(tok, e, "float")) {
                             return MAKE_TOKEN(ERRORTOKEN);
                         }
                         tok_backup(tok, e);
@@ -1021,7 +1064,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
                         p_end = tok->cur;
                         return MAKE_TOKEN(NUMBER);
                     }
-                    c = tok_decimal_tail(tok);
+                    c = tok_digits_tail(tok, 10);
                     if (c == 0) {
                         return MAKE_TOKEN(ERRORTOKEN);
                     }
diff --git a/Python/dtoa.c b/Python/dtoa.c
index 3de150351a4ef8..9bf01562982b98 100644
--- a/Python/dtoa.c
+++ b/Python/dtoa.c
@@ -118,6 +118,7 @@
 /* Linking of Python's #defines to Gay's #defines starts here. */
 
 #include "Python.h"
+#include "pycore_floatobject.h"   // _Py_dg_strtod_hex()
 #include "pycore_dtoa.h"          // _PY_SHORT_FLOAT_REPR
 #include "pycore_interp_structs.h"// struct Bigint
 #include "pycore_pystate.h"       // _PyInterpreterState_GET()
@@ -1412,6 +1413,9 @@ _Py_dg_strtod(const char *s00, char **se)
         c = *++s;
     }
 
+    if (*s == '0' && (*(s + 1) == 'x' || *(s + 1) == 'X'))
+        return _Py_dg_strtod_hex(s, se) * (sign ? -1: 1);
+
     /* Skip leading zeros: lz is true iff there were leading zeros. */
     s1 = s;
     while (c == '0')
diff --git a/Python/pystrtod.c b/Python/pystrtod.c
index 7b74f613ed563b..57206bb7fe73d6 100644
--- a/Python/pystrtod.c
+++ b/Python/pystrtod.c
@@ -169,13 +169,8 @@ _PyOS_ascii_strtod(const char *nptr, char **endptr)
         p++;
     }
 
-    /* Some platform strtods accept hex floats; Python shouldn't (at the
-       moment), so we check explicitly for strings starting with '0x'. */
-    if (*p == '0' && (*(p+1) == 'x' || *(p+1) == 'X'))
-        goto invalid_string;
-
     /* Check that what's left begins with a digit or decimal point */
-    if (!Py_ISDIGIT(*p) && *p != '.')
+    if (!Py_ISXDIGIT(*p) && *p != '.')
         goto invalid_string;
 
     digits_pos = p;
@@ -186,7 +181,7 @@ _PyOS_ascii_strtod(const char *nptr, char **endptr)
            swapped for the current locale's decimal point before we
            call strtod.  On the other hand, if we find the current
            locale's decimal point then the input is invalid. */
-        while (Py_ISDIGIT(*p))
+        while (Py_ISXDIGIT(*p))
             p++;
 
         if (*p == '.')
@@ -194,10 +189,10 @@ _PyOS_ascii_strtod(const char *nptr, char **endptr)
             decimal_point_pos = p++;
 
             /* locate end of number */
-            while (Py_ISDIGIT(*p))
+            while (Py_ISXDIGIT(*p))
                 p++;
 
-            if (*p == 'e' || *p == 'E')
+            if (*p == 'e' || *p == 'E' || *p == 'p' || *p == 'P')
                 p++;
             if (*p == '+' || *p == '-')
                 p++;
@@ -350,6 +345,7 @@ _Py_string_to_number_with_underscores(
     const char *p, *last;
     char *dup, *end;
     PyObject *result;
+    int (*_isdigit)(char) = &Py_ISDIGIT;
 
     assert(s[orig_len] == '\0');
 
@@ -364,21 +360,40 @@ _Py_string_to_number_with_underscores(
     end = dup;
     prev = '\0';
     last = s + orig_len;
-    for (p = s; *p; p++) {
+    p = s;
+    /* Has hexadecimal prefix? */
+    if (*p == '0' && (*(p+1) == 'x' || *(p+1) == 'X')) {
+        _isdigit = &Py_ISXDIGIT;
+        /* Accept prefix. */
+        *end++ = *p;
+        p++;
+        *end++ = *p;
+        p++;
+        /* Underscore allowed right after the prefix and before '.' */
+        if (*p == '_') {
+            p++;
+            if (*p == '.') {
+                *end++ = *p;
+                p++;
+            }
+        }
+    }
+    while (*p) {
         if (*p == '_') {
             /* Underscores are only allowed after digits. */
-            if (!(prev >= '0' && prev <= '9')) {
+            if (!(*_isdigit)(prev)) {
                 goto error;
             }
         }
         else {
             *end++ = *p;
             /* Underscores are only allowed before digits. */
-            if (prev == '_' && !(*p >= '0' && *p <= '9')) {
+            if (prev == '_' && !(*_isdigit)(*p)) {
                 goto error;
             }
         }
         prev = *p;
+        p++;
     }
     /* Underscores are not allowed at the end. */
     if (prev == '_') {

From cd9bf163c829f23aaf6d45366b840101cd57a9dc Mon Sep 17 00:00:00 2001
From: Sergey B Kirpichev <skirpichev@gmail.com>
Date: Tue, 2 Apr 2024 16:06:01 +0300
Subject: [PATCH 2/2] As it's requires a PEP, lets trigger a syntax warning

---
 Lib/test/test_grammar.py | 4 +++-
 Lib/test/test_zstd.py    | 2 +-
 Parser/lexer/lexer.c     | 5 +++++
 3 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/Lib/test/test_grammar.py b/Lib/test/test_grammar.py
index 818aff9db3eaf2..8afdcdb9cab34a 100644
--- a/Lib/test/test_grammar.py
+++ b/Lib/test/test_grammar.py
@@ -77,7 +77,9 @@ def test_plain_integers(self):
     def test_attrs_on_hexintegers(self):
         good_meth = [m for m in dir(int) if not m.startswith('_')]
         for m in good_meth:
-            self.assertEqual(eval('0x1.' + m), eval('(0x1).' + m))
+            with self.assertWarns(SyntaxWarning):
+                v = eval('0x1.' + m)
+            self.assertEqual(v, eval('(0x1).' + m))
         self.check_syntax_error('0x1.spam', "invalid hexadecimal literal",
                                 lineno=1, offset=4)
         self.check_syntax_error('0x1.foo', "invalid hexadecimal literal",
diff --git a/Lib/test/test_zstd.py b/Lib/test/test_zstd.py
index 6358cc78739cd9..d692a3ff0c53d7 100644
--- a/Lib/test/test_zstd.py
+++ b/Lib/test/test_zstd.py
@@ -1239,7 +1239,7 @@ def test_is_raw(self):
             ZstdDict(desk333=345)
 
     def test_invalid_dict(self):
-        DICT_MAGIC = 0xEC30A437.to_bytes(4, byteorder='little')
+        DICT_MAGIC = (0xEC30A437).to_bytes(4, byteorder='little')
         dict_content = DICT_MAGIC + b'abcdefghighlmnopqrstuvwxyz'
 
         # corrupted
diff --git a/Parser/lexer/lexer.c b/Parser/lexer/lexer.c
index a38f264c40aa47..e1c5e3fca4e2c7 100644
--- a/Parser/lexer/lexer.c
+++ b/Parser/lexer/lexer.c
@@ -896,6 +896,11 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
                         (c == 'n' && lookahead(tok, "umerator")) ||
                         (c == 'r' && lookahead(tok, "eal")))
                     {
+                        if (_PyTokenizer_parser_warn(tok, PyExc_SyntaxWarning,
+                                                     "invalid float literal"))
+                        {
+                            return 0;
+                        }
                         tok_backup(tok, c);
                         c = '.';
                         goto hexint;