diff --git a/Doc/c-api/conversion.rst b/Doc/c-api/conversion.rst index 4aaf3905e81c8a..0473c77ea9807e 100644 --- a/Doc/c-api/conversion.rst +++ b/Doc/c-api/conversion.rst @@ -124,7 +124,7 @@ The following functions provide locale-independent string to number conversions. *format_code*, *precision*, and *flags*. *format_code* must be one of ``'e'``, ``'E'``, ``'f'``, ``'F'``, - ``'g'``, ``'G'`` or ``'r'``. For ``'r'``, the supplied *precision* + ``'g'``, ``'G'``, ``'x'``, ``'X'`` or ``'r'``. For ``'r'``, the supplied *precision* must be 0 and is ignored. The ``'r'`` format code specifies the standard :func:`repr` format. @@ -151,6 +151,9 @@ The following functions provide locale-independent string to number conversions. .. versionadded:: 3.1 + .. versionchanged:: 3.14 + Support ``'x'`` and ``'X'`` format types for :class:`float`. + .. c:function:: int PyOS_stricmp(const char *s1, const char *s2) diff --git a/Doc/library/string.rst b/Doc/library/string.rst index c3c0d732cf18d4..75217af978fea8 100644 --- a/Doc/library/string.rst +++ b/Doc/library/string.rst @@ -588,6 +588,30 @@ The available presentation types for :class:`float` and | | as altered by the other format modifiers. | +---------+----------------------------------------------------------+ +Additionally, for :class:`float` available following representation types: + + +---------+----------------------------------------------------------+ + | Type | Meaning | + +=========+==========================================================+ + | ``'x'`` | Represent the number by a hexadecimal string in the | + | | form ``[±][0x]h[.hhh]p±d``, where there is one | + | | hexadecimal digit before the dot and the fractional part | + | | either is exact or the number of its hexadecimal digits | + | | is equal to the specified precision. The exponent ``d`` | + | | is written in decimal, it always contains at least one | + | | digit, and it gives the power of 2 by which to multiply | + | | the coefficient. | + | | | + | | If the ``'#'`` option is specified, the prefix ``'0x'`` | + | | will be inserted before an integer part. | + +---------+----------------------------------------------------------+ + | ``'X'`` | Same as ``'x'``, but uses uppercase digits, the ``0X`` | + | | prefix and ``'P'`` as the exponent separator. | + +---------+----------------------------------------------------------+ + +.. versionchanged:: 3.14 + Support ``'x'`` and ``'X'`` format types for :class:`float`. + .. _formatexamples: diff --git a/Include/codecs.h b/Include/codecs.h index 512a3c723eca18..6f98b1085c7270 100644 --- a/Include/codecs.h +++ b/Include/codecs.h @@ -168,6 +168,7 @@ PyAPI_FUNC(PyObject *) PyCodec_NameReplaceErrors(PyObject *exc); #ifndef Py_LIMITED_API PyAPI_DATA(const char *) Py_hexdigits; +PyAPI_DATA(const char *) Py_hexdigits_upper; #endif #ifdef __cplusplus diff --git a/Include/internal/pycore_floatobject.h b/Include/internal/pycore_floatobject.h index f984df695696c3..45de0f2388a134 100644 --- a/Include/internal/pycore_floatobject.h +++ b/Include/internal/pycore_floatobject.h @@ -55,6 +55,8 @@ extern PyObject* _Py_string_to_number_with_underscores( extern double _Py_parse_inf_or_nan(const char *p, char **endptr); +extern char * _Py_float_to_hex(double x, int precision, int always_add_sign, + int use_alt_formatting, int upper, int float_hex); #ifdef __cplusplus } diff --git a/Lib/test/test_float.py b/Lib/test/test_float.py index 756cf9bd7719c0..8b56d76bb0d979 100644 --- a/Lib/test/test_float.py +++ b/Lib/test/test_float.py @@ -700,12 +700,42 @@ def test_format(self): # % formatting self.assertEqual(format(-1.0, '%'), '-100.000000%') + # hexadecimal format + x = float.fromhex('0x0.0030p+0') + self.assertEqual(format(x, 'x'), '1.8p-11') + self.assertEqual(format(x, 'X'), '1.8P-11') + self.assertEqual(format(x, '.0x'), '1p-10') + x = float.fromhex('0x1.7p+0') + self.assertEqual(format(x, '.0x'), '1p+0') + x = float.fromhex('0x0.1p-1022') # subnormal + self.assertEqual(format(x, 'x'), '0.1p-1022') + x = float.fromhex('0x0.0040p+0') + self.assertEqual(format(x, 'x'), '1p-10') + self.assertEqual(format(x, '>10x'), ' 1p-10') + self.assertEqual(format(x, '>#10x'), ' 0x1p-10') + self.assertEqual(format(x, '>010x'), '000001p-10') + self.assertEqual(format(x, '>#010x'), '0000x1p-10') + self.assertEqual(format(x, '#010x'), '0x0001p-10') + self.assertEqual(format(x, '<10x'), '1p-10 ') + self.assertEqual(format(x, '<#10x'), '0x1p-10 ') + x = float.fromhex('0x1.fe12p0') + self.assertEqual(format(x, 'x'), '1.fe12p+0') + self.assertEqual(format(x, '#X'), '0X1.FE12P+0') + self.assertEqual(format(x, '.3x'), '1.fe1p+0') + self.assertEqual(format(x, '.1x'), '1.0p+1') + self.assertEqual(format(x, '#.1x'), '0x1.0p+1') + x = float.fromhex('1.08p+0') + self.assertEqual(format(x, 'x'), '1.08p+0') + self.assertEqual(format(x, '.1x'), '1.0p+0') + x = float.fromhex('1.98p+0') + self.assertEqual(format(x, '.1x'), '1.ap+0') + # conversion to string should fail self.assertRaises(ValueError, format, 3.0, "s") - # confirm format options expected to fail on floats, such as integer - # presentation types - for format_spec in 'sbcdoxX': + # confirm format options expected to fail on floats, such as some + # integer presentation types + for format_spec in 'sbcdo': self.assertRaises(ValueError, format, 0.0, format_spec) self.assertRaises(ValueError, format, 1.0, format_spec) self.assertRaises(ValueError, format, -1.0, format_spec) @@ -1479,7 +1509,7 @@ def roundtrip(x): self.identical(x, roundtrip(x)) self.identical(-x, roundtrip(-x)) - # fromHex(toHex(x)) should exactly recover x, for any non-NaN float x. + # roundtrip(x) should exactly recover x, for any non-NaN float x. import random for i in range(10000): e = random.randrange(-1200, 1200) @@ -1490,7 +1520,7 @@ def roundtrip(x): except OverflowError: pass else: - self.identical(x, fromHex(toHex(x))) + self.identical(x, roundtrip(x)) def test_subclass(self): class F(float): diff --git a/Lib/test/test_str.py b/Lib/test/test_str.py index 7bdd2881904548..7b8b8664428ffd 100644 --- a/Lib/test/test_str.py +++ b/Lib/test/test_str.py @@ -1322,9 +1322,7 @@ def __repr__(self): # test number formatter errors: self.assertRaises(ValueError, '{0:x}'.format, 1j) - self.assertRaises(ValueError, '{0:x}'.format, 1.0) self.assertRaises(ValueError, '{0:X}'.format, 1j) - self.assertRaises(ValueError, '{0:X}'.format, 1.0) self.assertRaises(ValueError, '{0:o}'.format, 1j) self.assertRaises(ValueError, '{0:o}'.format, 1.0) self.assertRaises(ValueError, '{0:u}'.format, 1j) diff --git a/Lib/test/test_types.py b/Lib/test/test_types.py index fbca198aab5180..eef15e9f4d7708 100644 --- a/Lib/test/test_types.py +++ b/Lib/test/test_types.py @@ -525,9 +525,9 @@ def test(f, format_spec, result): self.assertRaises(TypeError, 3.0.__format__, None) self.assertRaises(TypeError, 3.0.__format__, 0) - # confirm format options expected to fail on floats, such as integer - # presentation types - for format_spec in 'sbcdoxX': + # confirm format options expected to fail on floats, such as some + # integer presentation types + for format_spec in 'sbcdo': self.assertRaises(ValueError, format, 0.0, format_spec) self.assertRaises(ValueError, format, 1.0, format_spec) self.assertRaises(ValueError, format, -1.0, format_spec) diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-02-29-05-06-03.gh-issue-113805.5oGkdZ.rst b/Misc/NEWS.d/next/Core and Builtins/2024-02-29-05-06-03.gh-issue-113805.5oGkdZ.rst new file mode 100644 index 00000000000000..0e0dc2831529b1 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-02-29-05-06-03.gh-issue-113805.5oGkdZ.rst @@ -0,0 +1,2 @@ +Support formatting floats (using "x" and "X" format types) in hexadecimal +notation, like ``0x1.2efp-2``. Patch by Sergey B Kirpichev. diff --git a/Objects/floatobject.c b/Objects/floatobject.c index 2627ba80eed8ca..f76921855a7a35 100644 --- a/Objects/floatobject.c +++ b/Objects/floatobject.c @@ -1133,14 +1133,7 @@ float_conjugate_impl(PyObject *self) return float_float(self); } -/* turn ASCII hex characters into integer values and vice versa */ - -static char -char_from_hex(int x) -{ - assert(0 <= x && x < 16); - return Py_hexdigits[x]; -} +/* turn ASCII hex characters into integer values */ static int hex_from_char(char c) { @@ -1207,11 +1200,141 @@ hex_from_char(char c) { return x; } -/* convert a float to a hexadecimal string */ +/* Convert a float to a hexadecimal string [±][0x]h[.hhhhhhhh]p±d, + where the fractional part either is exact (precision < 0) or the + number of digits after the dot is equal to the precision. + + The return value is a pointer to buffer with the converted string or NULL if + the conversion failed. The caller is responsible for freeing the returned + string by calling PyMem_Free(). + + The exponent d is written in decimal, it always contains at least one digit, + and it gives the power of 2 by which to multiply the coefficient. + + x - the double to be converted + precision - the desired precision + always_add_sign - nonzero if a '+' sign should be included for x > 0 + use_alt_formatting - nonzero if the hexadecimal prefix should be added. + upper - nonzero, if uppercase letters should be used for hexadecimal + numbers, prefix and the exponent separator. + float_hex - use float.hex() format. + */ + +char * +_Py_float_to_hex(double x, int precision, int always_add_sign, + int use_alt_formatting, int upper, int float_hex) +{ + /* DBL_MANT_DIG rounded up to the next integer of the form 4k+1 */ + const int tohex_nbits = DBL_MANT_DIG + 3 - (DBL_MANT_DIG+2)%4; + const int full_prec = (int) (tohex_nbits - 1)/4; + int e; + double m = frexp(fabs(x), &e); + + int autoprec = precision < 0; + if (autoprec) { + precision = full_prec; + if (!x && float_hex) { + /* for compatibility with float.hex(), we keep just one + digit of zero */ + precision = 1; + } + } + + /* normalization */ + if (m) { + int shift = 1 - Py_MAX(DBL_MIN_EXP - e, 0); + m = ldexp(m, shift); + e -= shift; + } + + /* round to precision digits */ + if (!autoprec) { + int round_prec_x4 = 4*Py_MIN(precision, full_prec); + do { + double frac = ldexp(m, round_prec_x4); + long ipart = (long)floor(frac); + frac -= ipart; + frac *= 16.0; + if (frac >= 8.0) { + if (frac != 8.0 || ipart & 0x1) { + m += ldexp(1.0, -round_prec_x4); + } + } + if ((int)(m) & 0x2) { + m /= 2.0; + e += 1; + } + else { + break; + } + } while (round_prec_x4); + } + + /* Conservative estimation for number of digits in the exponent. + IEEE quadruple precision floats should fit. */ + const size_t exp_len = 5; -/* TOHEX_NBITS is DBL_MANT_DIG rounded up to the next integer - of the form 4k+1. */ -#define TOHEX_NBITS DBL_MANT_DIG + 3 - (DBL_MANT_DIG+2)%4 + /* Allocate space for [±][0x] h[.] [hhhhhhhh] p± d '\0' */ + size_t size = 1 + 2 + precision + 2 + exp_len + 1; + if (use_alt_formatting) { + size += 2; + } + char *s = PyMem_Malloc(size); + if (!s) { + return NULL; + } + + /* sign and prefix */ + size_t si = 0; + if (copysign(1.0, x) == -1.0) { + s[si] = '-'; + si++; + } + else if (always_add_sign) { + s[si] = '+'; + si++; + } + if (use_alt_formatting) { + s[si] = '0'; + si++; + s[si] = upper ? 'X' : 'x'; + si++; + } + + /* mantissa */ + const char *hexmap = upper ? Py_hexdigits_upper : Py_hexdigits; + assert(0 <= (int)m && (int)m < 16); + s[si] = hexmap[(int)m]; + si++; + m -= (int)m; + s[si] = '.'; + for (int i = 0; i < precision; i++) { + si++; + m *= 16.0; + assert(0 <= (int)m && (int)m < 16); + s[si] = hexmap[(int)m]; + m -= (int)m; + } + + /* clear trailing zeros from mantissa */ + if (autoprec && !float_hex) { + while (s[si] == '0') { + si--; + } + } + + /* clear trailing dot */ + if (s[si] != '.') { + si++; + } + + /* exponent */ + s[si] = upper ? 'P' : 'p'; + si++; + si += snprintf(s + si, exp_len + 2, "%+d", e) + 1; + + return s; +} /*[clinic input] float.hex @@ -1228,54 +1351,24 @@ static PyObject * float_hex_impl(PyObject *self) /*[clinic end generated code: output=0ebc9836e4d302d4 input=bec1271a33d47e67]*/ { - double x, m; - int e, shift, i, si, esign; - /* Space for 1+(TOHEX_NBITS-1)/4 digits, a decimal point, and the - trailing NUL byte. */ - char s[(TOHEX_NBITS-1)/4+3]; - - CONVERT_TO_DOUBLE(self, x); + PyObject *result = NULL; + double x = PyFloat_AS_DOUBLE(self); - if (isnan(x) || isinf(x)) + if (isnan(x) || isinf(x)) { return float_repr((PyFloatObject *)self); - - if (x == 0.0) { - if (copysign(1.0, x) == -1.0) - return PyUnicode_FromString("-0x0.0p+0"); - else - return PyUnicode_FromString("0x0.0p+0"); } - m = frexp(fabs(x), &e); - shift = 1 - Py_MAX(DBL_MIN_EXP - e, 0); - m = ldexp(m, shift); - e -= shift; + char *buf = _Py_float_to_hex(x, -1, 0, 1, 0, 1); - si = 0; - s[si] = char_from_hex((int)m); - si++; - m -= (int)m; - s[si] = '.'; - si++; - for (i=0; i < (TOHEX_NBITS-1)/4; i++) { - m *= 16.0; - s[si] = char_from_hex((int)m); - si++; - m -= (int)m; + if (buf) { + result = PyUnicode_FromString(buf); + PyMem_Free(buf); } - s[si] = '\0'; - - if (e < 0) { - esign = (int)'-'; - e = -e; + else { + PyErr_NoMemory(); } - else - esign = (int)'+'; - if (x < 0.0) - return PyUnicode_FromFormat("-0x%sp%c%d", s, esign, e); - else - return PyUnicode_FromFormat("0x%sp%c%d", s, esign, e); + return result; } /* Convert a hexadecimal string to a float. */ diff --git a/Python/codecs.c b/Python/codecs.c index bed245366f9234..5244bca5f915cb 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -17,6 +17,7 @@ Copyright (c) Corporation for National Research Initiatives. #include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI const char *Py_hexdigits = "0123456789abcdef"; +const char *Py_hexdigits_upper = "0123456789ABCDEF"; /* --- Codec Registry ----------------------------------------------------- */ diff --git a/Python/formatter_unicode.c b/Python/formatter_unicode.c index 6af589f966a502..53754cd19fb1e8 100644 --- a/Python/formatter_unicode.c +++ b/Python/formatter_unicode.c @@ -1047,6 +1047,7 @@ format_float_internal(PyObject *value, Py_ssize_t n_digits; Py_ssize_t n_remainder; Py_ssize_t n_total; + Py_ssize_t n_prefix = 0; int has_decimal; double val; int precision, default_precision = 6; @@ -1059,7 +1060,7 @@ format_float_internal(PyObject *value, Py_UCS4 maxchar = 127; Py_UCS4 sign_char = '\0'; int float_type; /* Used to see if we have a nan, inf, or regular float. */ - PyObject *unicode_tmp = NULL; + PyObject *unicode_tmp = NULL, *prefix = NULL; /* Locale settings, either from the actual locale or from a hard-code pseudo-locale */ @@ -1100,7 +1101,7 @@ format_float_internal(PyObject *value, add_pct = 1; } - if (precision < 0) + if (precision < 0 && type != 'x' && type != 'X') precision = default_precision; else if (type == 'r') type = 'g'; @@ -1148,6 +1149,14 @@ format_float_internal(PyObject *value, ++index; --n_digits; } + if (PyUnicode_READ_CHAR(unicode_tmp, index) == '0' + && (PyUnicode_READ_CHAR(unicode_tmp, index + 1) == 'x' + || PyUnicode_READ_CHAR(unicode_tmp, index + 1) == 'X')) { + n_prefix = 2; + index += 2; + n_digits -= 2; + prefix = unicode_tmp; + } /* Determine if we have any "remainder" (after the digits, might include decimal or exponent or both (or neither)) */ @@ -1160,7 +1169,7 @@ format_float_internal(PyObject *value, goto done; /* Calculate how much memory we'll need. */ - n_total = calc_number_widths(&spec, 0, sign_char, index, + n_total = calc_number_widths(&spec, n_prefix, sign_char, index, index + n_digits, n_remainder, has_decimal, &locale, format, &maxchar); if (n_total == -1) { @@ -1174,7 +1183,7 @@ format_float_internal(PyObject *value, /* Populate the memory. */ result = fill_number(writer, &spec, unicode_tmp, index, - NULL, 0, format->fill_char, + prefix, n_prefix ? index - 2 : 0, format->fill_char, &locale, 0); done: @@ -1574,6 +1583,8 @@ _PyFloat_FormatAdvancedWriter(_PyUnicodeWriter *writer, case 'G': case 'n': case '%': + case 'x': + case 'X': /* no conversion, already a float. do the formatting */ return format_float_internal(obj, &format, writer); diff --git a/Python/pystrtod.c b/Python/pystrtod.c index 5c8be0447ace4b..f09d7bd9b95aed 100644 --- a/Python/pystrtod.c +++ b/Python/pystrtod.c @@ -2,6 +2,7 @@ #include #include "pycore_dtoa.h" // _Py_dg_strtod() +#include "pycore_floatobject.h" // _Py_float_to_hex() #include "pycore_pymath.h" // _PY_SHORT_FLOAT_REPR #include // localeconv() @@ -771,6 +772,7 @@ char * PyOS_double_to_string(double val, case 'e': /* exponent */ case 'f': /* fixed */ case 'g': /* general */ + case 'x': /* double in hexadecimal */ break; case 'E': upper = 1; @@ -784,6 +786,10 @@ char * PyOS_double_to_string(double val, upper = 1; format_code = 'g'; break; + case 'X': + upper = 1; + format_code = 'x'; + break; case 'r': /* repr format */ /* Supplied precision is unused, must be 0. */ if (precision != 0) { @@ -874,6 +880,14 @@ char * PyOS_double_to_string(double val, if (flags & Py_DTSF_ADD_DOT_0) format_code = 'Z'; + /* Use own helper for hexadecimal notation, because the 'a' format + type of the stdlib behaves differently wrt the '#' option. */ + if (format_code == 'x') { + PyMem_Free(buf); + return _Py_float_to_hex(val, precision, flags & Py_DTSF_SIGN, + flags & Py_DTSF_ALT, upper, 0); + } + PyOS_snprintf(format, sizeof(format), "%%%s.%i%c", (flags & Py_DTSF_ALT ? "#" : ""), precision, format_code); @@ -978,6 +992,13 @@ format_float_short(double d, char format_code, char *digits, *digits_end; int decpt_as_int, sign, exp_len, exp = 0, use_exp = 0; Py_ssize_t decpt, digits_len, vdigits_start, vdigits_end; + + if (format_code == 'x' && Py_IS_FINITE(d)) { + return _Py_float_to_hex(d, precision, always_add_sign, + use_alt_formatting, + float_strings == uc_float_strings, 0); + } + _Py_SET_53BIT_PRECISION_HEADER; /* _Py_dg_dtoa returns a digit string (no decimal point or exponent). @@ -1230,6 +1251,14 @@ char * PyOS_double_to_string(double val, /* Validate format_code, and map upper and lower case. Compute the mode and make any adjustments as needed. */ switch (format_code) { + /* hexadecimal floats */ + case 'X': + float_strings = uc_float_strings; + /* Fall through. */ + case 'x': + format_code = 'x'; + mode = 2; + break; /* exponent */ case 'E': float_strings = uc_float_strings; diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv index 466f25daa14dc6..59716f36b6caa4 100644 --- a/Tools/c-analyzer/cpython/ignored.tsv +++ b/Tools/c-analyzer/cpython/ignored.tsv @@ -344,6 +344,7 @@ Python/ast_opt.c fold_unaryop ops - Python/ceval.c - _PyEval_BinaryOps - Python/ceval.c - _Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS - Python/codecs.c - Py_hexdigits - +Python/codecs.c - Py_hexdigits_upper - Python/codecs.c - ucnhash_capi - Python/codecs.c _PyCodec_InitRegistry methods - Python/compile.c - NO_LABEL -