diff --git a/Doc/c-api/arg.rst b/Doc/c-api/arg.rst index 803572afc39ae3..3429a4eb652709 100644 --- a/Doc/c-api/arg.rst +++ b/Doc/c-api/arg.rst @@ -113,18 +113,14 @@ There are three ways strings and buffers can be converted to C: ``z`` (:class:`str` or ``None``) [const char \*] Like ``s``, but the Python object may also be ``None``, in which case the C pointer is set to ``NULL``. - It is the same as ``s?`` with the C pointer was initialized to ``NULL``. ``z*`` (:class:`str`, :term:`bytes-like object` or ``None``) [Py_buffer] Like ``s*``, but the Python object may also be ``None``, in which case the ``buf`` member of the :c:type:`Py_buffer` structure is set to ``NULL``. - It is the same as ``s*?`` with the ``buf`` member of the :c:type:`Py_buffer` - structure was initialized to ``NULL``. ``z#`` (:class:`str`, read-only :term:`bytes-like object` or ``None``) [const char \*, :c:type:`Py_ssize_t`] Like ``s#``, but the Python object may also be ``None``, in which case the C pointer is set to ``NULL``. - It is the same as ``s#?`` with the C pointer was initialized to ``NULL``. ``y`` (read-only :term:`bytes-like object`) [const char \*] This format converts a bytes-like object to a C pointer to a @@ -394,17 +390,6 @@ Other objects Non-tuple sequences are deprecated if *items* contains format units which store a borrowed buffer or a borrowed reference. -``unit?`` (anything or ``None``) [*matching-variable(s)*] - ``?`` modifies the behavior of the preceding format unit. - The C variable(s) corresponding to that parameter should be initialized - to their default value --- when the argument is ``None``, - :c:func:`PyArg_ParseTuple` does not touch the contents of the corresponding - C variable(s). - If the argument is not ``None``, it is parsed according to the specified - format unit. - - .. versionadded:: 3.14 - A few other characters have a meaning in a format string. These may not occur inside nested parentheses. They are: diff --git a/Doc/glossary.rst b/Doc/glossary.rst index 199a917f9f101e..b7bd547d38fd1e 100644 --- a/Doc/glossary.rst +++ b/Doc/glossary.rst @@ -462,7 +462,7 @@ Glossary core and with user code. f-string - String literals prefixed with ``'f'`` or ``'F'`` are commonly called + String literals prefixed with ``f`` or ``F`` are commonly called "f-strings" which is short for :ref:`formatted string literals `. See also :pep:`498`. @@ -1322,6 +1322,11 @@ Glossary See also :term:`borrowed reference`. + t-string + String literals prefixed with ``t`` or ``T`` are commonly called + "t-strings" which is short for + :ref:`template string literals `. + text encoding A string in Python is a sequence of Unicode code points (in range ``U+0000``--``U+10FFFF``). To store or transfer a string, it needs to be diff --git a/Doc/library/annotationlib.rst b/Doc/library/annotationlib.rst index 7dfc11449a6cbc..981d89be7d58d6 100644 --- a/Doc/library/annotationlib.rst +++ b/Doc/library/annotationlib.rst @@ -511,7 +511,7 @@ code execution even with no access to any globals or builtins. For example: >>> def f(x: (1).__class__.__base__.__subclasses__()[-1].__init__.__builtins__["print"]("Hello world")): pass ... - >>> annotationlib.get_annotations(f, format=annotationlib.Format.SOURCE) + >>> annotationlib.get_annotations(f, format=annotationlib.Format.STRING) Hello world {'x': 'None'} diff --git a/Doc/library/ast.rst b/Doc/library/ast.rst index ef6c62dca1e124..b24459b5c6346f 100644 --- a/Doc/library/ast.rst +++ b/Doc/library/ast.rst @@ -289,9 +289,9 @@ Literals * ``conversion`` is an integer: * -1: no formatting - * 115: ``!s`` string formatting - * 114: ``!r`` repr formatting - * 97: ``!a`` ascii formatting + * 115 (``ord('s')``): ``!s`` string formatting + * 114 (``ord('r')``): ``!r`` repr formatting + * 97 (``ord('a')``): ``!a`` ASCII formatting * ``format_spec`` is a :class:`JoinedStr` node representing the formatting of the value, or ``None`` if no format was specified. Both @@ -325,6 +325,54 @@ Literals Constant(value='.3')]))])) +.. class:: TemplateStr(values) + + A t-string, comprising a series of :class:`Interpolation` and :class:`Constant` + nodes. + + .. doctest:: + + >>> print(ast.dump(ast.parse('t"{name} finished {place:ordinal}"', mode='eval'), indent=4)) + Expression( + body=TemplateStr( + values=[ + Interpolation( + value=Name(id='name'), + str='name', + conversion=-1), + Constant(value=' finished '), + Interpolation( + value=Name(id='place'), + str='place', + conversion=-1, + format_spec=JoinedStr( + values=[ + Constant(value='ordinal')]))])) + + .. versionadded:: 3.14 + + +.. class:: Interpolation(value, str, conversion, format_spec) + + Node representing a single interpolation field in a t-string. + + * ``value`` is any expression node (such as a literal, a variable, or a + function call). + * ``str`` is a constant containing the text of the interpolation expression. + * ``conversion`` is an integer: + + * -1: no conversion + * 115: ``!s`` string conversion + * 114: ``!r`` repr conversion + * 97: ``!a`` ascii conversion + + * ``format_spec`` is a :class:`JoinedStr` node representing the formatting + of the value, or ``None`` if no format was specified. Both + ``conversion`` and ``format_spec`` can be set at the same time. + + .. versionadded:: 3.14 + + .. class:: List(elts, ctx) Tuple(elts, ctx) diff --git a/Doc/library/dis.rst b/Doc/library/dis.rst index 11685a32f48e4f..ac8a911c40a860 100644 --- a/Doc/library/dis.rst +++ b/Doc/library/dis.rst @@ -1120,6 +1120,48 @@ iterations of the loop. .. versionadded:: 3.12 +.. opcode:: BUILD_TEMPLATE + + Constructs a new :class:`~string.templatelib.Template` from a tuple + of strings and a tuple of interpolations and pushes the resulting instance + onto the stack:: + + interpolations = STACK.pop() + strings = STACK.pop() + STACK.append(_build_template(strings, interpolations)) + + .. versionadded:: 3.14 + + +.. opcode:: BUILD_INTERPOLATION (format) + + Constructs a new :class:`~string.templatelib.Interpolation` from a + value and its source expression and pushes the resulting instance onto the + stack. + + If no conversion or format specification is present, ``format`` is set to + ``2``. + + If the low bit of ``format`` is set, it indicates that the interpolation + contains a format specification. + + If ``format >> 2`` is non-zero, it indicates that the interpolation + contains a conversion. The value of ``format >> 2`` is the conversion type + (``0`` for no conversion, ``1`` for ``!s``, ``2`` for ``!r``, and + ``3`` for ``!a``):: + + conversion = format >> 2 + if format & 1: + format_spec = STACK.pop() + else: + format_spec = None + expression = STACK.pop() + value = STACK.pop() + STACK.append(_build_interpolation(value, expression, conversion, format_spec)) + + .. versionadded:: 3.14 + + .. opcode:: BUILD_TUPLE (count) Creates a tuple consuming *count* items from the stack, and pushes the diff --git a/Doc/library/os.path.rst b/Doc/library/os.path.rst index d59f91f2aa8000..d27023cde4cfe9 100644 --- a/Doc/library/os.path.rst +++ b/Doc/library/os.path.rst @@ -94,7 +94,7 @@ the :mod:`glob` module.) Any iterable can now be passed, rather than just sequences. -.. function:: commonprefix(list) +.. function:: commonprefix(list, /) Return the longest path prefix (taken character-by-character) that is a prefix of all paths in *list*. If *list* is empty, return the empty string @@ -199,14 +199,14 @@ the :mod:`glob` module.) Accepts a :term:`path-like object`. -.. function:: getatime(path) +.. function:: getatime(path, /) Return the time of last access of *path*. The return value is a floating-point number giving the number of seconds since the epoch (see the :mod:`time` module). Raise :exc:`OSError` if the file does not exist or is inaccessible. -.. function:: getmtime(path) +.. function:: getmtime(path, /) Return the time of last modification of *path*. The return value is a floating-point number giving the number of seconds since the epoch (see the :mod:`time` module). @@ -216,7 +216,7 @@ the :mod:`glob` module.) Accepts a :term:`path-like object`. -.. function:: getctime(path) +.. function:: getctime(path, /) Return the system's ctime which, on some systems (like Unix) is the time of the last metadata change, and, on others (like Windows), is the creation time for *path*. @@ -228,7 +228,7 @@ the :mod:`glob` module.) Accepts a :term:`path-like object`. -.. function:: getsize(path) +.. function:: getsize(path, /) Return the size, in bytes, of *path*. Raise :exc:`OSError` if the file does not exist or is inaccessible. @@ -351,7 +351,7 @@ the :mod:`glob` module.) .. versionadded:: 3.13 -.. function:: join(path, *paths) +.. function:: join(path, /, *paths) Join one or more path segments intelligently. The return value is the concatenation of *path* and all members of *\*paths*, with exactly one @@ -402,7 +402,7 @@ the :mod:`glob` module.) Accepts a :term:`path-like object`. -.. function:: realpath(path, *, strict=False) +.. function:: realpath(path, /, *, strict=False) Return the canonical path of the specified filename, eliminating any symbolic links encountered in the path (if they are supported by the operating @@ -471,7 +471,7 @@ the :mod:`glob` module.) Accepts a :term:`path-like object`. -.. function:: samefile(path1, path2) +.. function:: samefile(path1, path2, /) Return ``True`` if both pathname arguments refer to the same file or directory. This is determined by the device number and i-node number and raises an @@ -498,7 +498,7 @@ the :mod:`glob` module.) Accepts a :term:`path-like object`. -.. function:: samestat(stat1, stat2) +.. function:: samestat(stat1, stat2, /) Return ``True`` if the stat tuples *stat1* and *stat2* refer to the same file. These structures may have been returned by :func:`os.fstat`, diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index 8e688f03eb3a87..90683c0b00d78a 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -2675,9 +2675,9 @@ For example: lead to a number of common errors (such as failing to display tuples and dictionaries correctly). Using the newer :ref:`formatted string literals `, the :meth:`str.format` interface, or :ref:`template strings - ` may help avoid these errors. Each of these - alternatives provides their own trade-offs and benefits of simplicity, - flexibility, and/or extensibility. + ($-strings) ` may help avoid these errors. + Each of these alternatives provides their own trade-offs and benefits of + simplicity, flexibility, and/or extensibility. String objects have one unique built-in operation: the ``%`` operator (modulo). This is also known as the string *formatting* or *interpolation* operator. diff --git a/Doc/library/string.rst b/Doc/library/string.rst index 23e15780075435..83e8ee2722ed8a 100644 --- a/Doc/library/string.rst +++ b/Doc/library/string.rst @@ -198,8 +198,9 @@ Format String Syntax The :meth:`str.format` method and the :class:`Formatter` class share the same syntax for format strings (although in the case of :class:`Formatter`, subclasses can define their own format string syntax). The syntax is -related to that of :ref:`formatted string literals `, but it is -less sophisticated and, in particular, does not support arbitrary expressions. +related to that of :ref:`formatted string literals ` and +:ref:`template string literals `, but it is less sophisticated +and, in particular, does not support arbitrary expressions. .. index:: single: {} (curly brackets); in string formatting @@ -264,6 +265,8 @@ Some simple format string examples:: "Weight in tons {0.weight}" # 'weight' attribute of first positional arg "Units destroyed: {players[0]}" # First element of keyword argument 'players'. +.. _formatstrings-conversion: + The *conversion* field causes a type coercion before formatting. Normally, the job of formatting a value is done by the :meth:`~object.__format__` method of the value itself. However, in some cases it is desirable to force a type to be formatted @@ -306,7 +309,7 @@ Format Specification Mini-Language "Format specifications" are used within replacement fields contained within a format string to define how individual values are presented (see -:ref:`formatstrings` and :ref:`f-strings`). +:ref:`formatstrings`, :ref:`f-strings`, and :ref:`t-strings`). They can also be passed directly to the built-in :func:`format` function. Each formattable type may define how the format specification is to be interpreted. @@ -789,10 +792,20 @@ Nesting arguments and more complex examples:: -.. _template-strings: +.. _template-strings-pep292: -Template strings ----------------- +Template strings ($-strings) +---------------------------- + +.. note:: + + The feature described here was introduced in Python 2.4. It is unrelated + to, and should not be confused with, the newer + :ref:`template strings ` and + :ref:`t-string literal syntax ` introduced in Python 3.14. + T-string literals evaluate to instances of a different + :class:`~string.templatelib.Template` class, found in the + :mod:`string.templatelib` module. Template strings provide simpler string substitutions as described in :pep:`292`. A primary use case for template strings is for diff --git a/Doc/library/string.templatelib.rst b/Doc/library/string.templatelib.rst new file mode 100644 index 00000000000000..31b90d75f411f0 --- /dev/null +++ b/Doc/library/string.templatelib.rst @@ -0,0 +1,313 @@ +:mod:`!string.templatelib` --- Support for template string literals +=================================================================== + +.. module:: string.templatelib + :synopsis: Support for template string literals. + +**Source code:** :source:`Lib/string/templatelib.py` + +-------------- + +.. seealso:: + + * :ref:`Format strings ` + * :ref:`T-string literal syntax ` + + +.. _template-strings: + +Template strings +---------------- + +.. versionadded:: 3.14 + +Template strings are a formatting mechanism that allows for deep control over +how strings are processed. You can create templates using +:ref:`t-string literal syntax `, which is identical to +:ref:`f-string syntax ` but uses a ``t`` instead of an ``f``. +While f-strings evaluate to ``str``, t-strings create a :class:`Template` +instance that gives you access to the static and interpolated (in curly braces) +parts of a string *before* they are combined. + + +.. _templatelib-template: + +Template +-------- + +The :class:`!Template` class describes the contents of a template string. + +:class:`!Template` instances are immutable: their attributes cannot be +reassigned. + +.. class:: Template(*args) + + Create a new :class:`!Template` object. + + :param args: A mix of strings and :class:`Interpolation` instances in any order. + :type args: str | Interpolation + + The most common way to create a :class:`!Template` instance is to use the + :ref:`t-string literal syntax `. This syntax is identical to that of + :ref:`f-strings ` except that it uses a ``t`` instead of an ``f``: + + >>> name = "World" + >>> template = t"Hello {name}!" + >>> type(template) + + + Templates ars stored as sequences of literal :attr:`~Template.strings` + and dynamic :attr:`~Template.interpolations`. + A :attr:`~Template.values` attribute holds the interpolation values: + + >>> template.strings + ('Hello ', '!') + >>> template.interpolations + (Interpolation('World', ...),) + >>> template.values + ('World',) + + The :attr:`!strings` tuple has one more element than :attr:`!interpolations` + and :attr:`!values`; the interpolations “belong” between the strings. + This may be easier to understand when tuples are aligned:: + + template.strings: ('Hello ', '!') + template.values: ( 'World', ) + + While literal syntax is the most common way to create :class:`!Template` + instances, it is also possible to create them directly using the constructor: + + >>> from string.templatelib import Interpolation, Template + >>> name = "World" + >>> template = Template("Hello, ", Interpolation(name, "name"), "!") + >>> list(template) + ['Hello, ', Interpolation('World', 'name', None, ''), '!'] + + If two or more consecutive strings are passed, they will be concatenated + into a single value in the :attr:`~Template.strings` attribute. For example, + the following code creates a :class:`Template` with a single final string: + + >>> from string.templatelib import Template + >>> template = Template("Hello ", "World", "!") + >>> template.strings + ('Hello World!',) + + If two or more consecutive interpolations are passed, they will be treated + as separate interpolations and an empty string will be inserted between them. + For example, the following code creates a template with empty placeholders + in the :attr:`~Template.strings` attribute: + + >>> from string.templatelib import Interpolation, Template + >>> template = Template(Interpolation("World", "name"), Interpolation("!", "punctuation")) + >>> template.strings + ('', '', '') + + .. attribute:: strings + :type: tuple[str, ...] + + A :ref:`tuple ` of the static strings in the template. + + >>> name = "World" + >>> t"Hello {name}!".strings + ('Hello ', '!') + + Empty strings *are* included in the tuple: + + >>> name = "World" + >>> t"Hello {name}{name}!".strings + ('Hello ', '', '!') + + The ``strings`` tuple is never empty, and always contains one more + string than the ``interpolations`` and ``values`` tuples: + + >>> t"".strings + ('',) + >>> t"".values + () + >>> t"{'cheese'}".strings + ('', '') + >>> t"{'cheese'}".values + ('cheese',) + + .. attribute:: interpolations + :type: tuple[Interpolation, ...] + + A tuple of the interpolations in the template. + + >>> name = "World" + >>> t"Hello {name}!".interpolations + (Interpolation('World', 'name', None, ''),) + + The ``interpolations`` tuple may be empty and always contains one fewer + values than the ``strings`` tuple: + + >>> t"Hello!".interpolations + () + + .. attribute:: values + :type: tuple[Any, ...] + + A tuple of all interpolated values in the template. + + >>> name = "World" + >>> t"Hello {name}!".values + ('World',) + + The ``values`` tuple always has the same length as the + ``interpolations`` tuple. It is equivalent to + ``tuple(i.value for i in template.interpolations)``. + + .. describe:: iter(template) + + Iterate over the template, yielding each string and + :class:`Interpolation` in order. + + >>> name = "World" + >>> list(t"Hello {name}!") + ['Hello ', Interpolation('World', 'name', None, ''), '!'] + + Empty strings are *not* included in the iteration: + + >>> name = "World" + >>> list(t"Hello {name}{name}") + ['Hello ', Interpolation('World', 'name', None, ''), Interpolation('World', 'name', None, '')] + + .. describe:: template + other + template += other + + Concatenate this template with another, returning a new + :class:`!Template` instance: + + >>> name = "World" + >>> list(t"Hello " + t"there {name}!") + ['Hello there ', Interpolation('World', 'name', None, ''), '!'] + + Concatenation between a :class:`!Template` and a ``str`` is *not* supported. + This is because it is ambiguous whether the string should be treated as + a static string or an interpolation. If you want to concatenate a + :class:`!Template` with a string, you should either wrap the string + directly in a :class:`!Template` (to treat it as a static string) or use + an :class:`!Interpolation` (to treat it as dynamic): + + >>> from string.templatelib import Template, Interpolation + >>> template = t"Hello " + >>> # Treat "there " as a static string + >>> template += Template("there ") + >>> # Treat name as an interpolation + >>> name = "World" + >>> template += Template(Interpolation(name, "name")) + >>> list(template) + ['Hello there ', Interpolation('World', 'name', None, '')] + + +.. class:: Interpolation(value, expression="", conversion=None, format_spec="") + + Create a new :class:`!Interpolation` object. + + :param value: The evaluated, in-scope result of the interpolation. + :type value: object + + :param expression: The text of a valid Python expression, or an empty string. + :type expression: str + + :param conversion: The optional :ref:`conversion ` to be used, one of r, s, and a. + :type conversion: ``Literal["a", "r", "s"] | None`` + + :param format_spec: An optional, arbitrary string used as the :ref:`format specification ` to present the value. + :type format_spec: str + + The :class:`!Interpolation` type represents an expression inside a template string. + + :class:`!Interpolation` instances are immutable: their attributes cannot be + reassigned. + + .. attribute:: value + + :returns: The evaluated value of the interpolation. + :type: object + + >>> t"{1 + 2}".interpolations[0].value + 3 + + .. attribute:: expression + + :returns: The text of a valid Python expression, or an empty string. + :type: str + + The :attr:`~Interpolation.expression` is the original text of the + interpolation's Python expression, if the interpolation was created + from a t-string literal. Developers creating interpolations manually + should either set this to an empty string or choose a suitable valid + Python expression. + + >>> t"{1 + 2}".interpolations[0].expression + '1 + 2' + + .. attribute:: conversion + + :returns: The conversion to apply to the value, or ``None``. + :type: ``Literal["a", "r", "s"] | None`` + + The :attr:`!Interpolation.conversion` is the optional conversion to apply + to the value: + + >>> t"{1 + 2!a}".interpolations[0].conversion + 'a' + + .. note:: + + Unlike f-strings, where conversions are applied automatically, + the expected behavior with t-strings is that code that *processes* the + :class:`!Template` will decide how to interpret and whether to apply + the :attr:`!Interpolation.conversion`. + + .. attribute:: format_spec + + :returns: The format specification to apply to the value. + :type: str + + The :attr:`!Interpolation.format_spec` is an optional, arbitrary string + used as the format specification to present the value: + + >>> t"{1 + 2:.2f}".interpolations[0].format_spec + '.2f' + + .. note:: + + Unlike f-strings, where format specifications are applied automatically + via the :func:`format` protocol, the expected behavior with + t-strings is that code that *processes* the :class:`!Template` will + decide how to interpret and whether to apply the format specification. + As a result, :attr:`!Interpolation.format_spec` values in + :class:`!Template` instances can be arbitrary strings, even those that + do not necessarily conform to the rules of Python's :func:`format` + protocol. + + Interpolations support pattern matching, allowing you to match against + their attributes with the :ref:`match statement `: + + >>> from string.templatelib import Interpolation + >>> interpolation = Interpolation(3.0, "1 + 2", None, ".2f") + >>> match interpolation: + ... case Interpolation(value, expression, conversion, format_spec): + ... print(value, expression, conversion, format_spec) + ... + 3.0 1 + 2 None .2f + + +Helper functions +---------------- + +.. function:: convert(obj, /, conversion) + + Applies formatted string literal :ref:`conversion ` + semantics to the given object *obj*. + This is frequently useful for custom template string processing logic. + + Three conversion flags are currently supported: + + * ``'s'`` which calls :func:`str` on the value, + * ``'r'`` which calls :func:`repr`, and + * ``'a'`` which calls :func:`ascii`. + + If the conversion flag is ``None``, *obj* is returned unchanged. diff --git a/Doc/library/text.rst b/Doc/library/text.rst index 47b678434fc899..92e7dd9a53b80d 100644 --- a/Doc/library/text.rst +++ b/Doc/library/text.rst @@ -16,6 +16,7 @@ Python's built-in string type in :ref:`textseq`. .. toctree:: string.rst + string.templatelib.rst re.rst difflib.rst textwrap.rst diff --git a/Doc/reference/compound_stmts.rst b/Doc/reference/compound_stmts.rst index e95fa3a6424e23..a416cbb4cc8eab 100644 --- a/Doc/reference/compound_stmts.rst +++ b/Doc/reference/compound_stmts.rst @@ -852,8 +852,8 @@ A literal pattern corresponds to most The rule ``strings`` and the token ``NUMBER`` are defined in the :doc:`standard Python grammar <./grammar>`. Triple-quoted strings are -supported. Raw strings and byte strings are supported. :ref:`f-strings` are -not supported. +supported. Raw strings and byte strings are supported. :ref:`f-strings` +and :ref:`t-strings` are not supported. The forms ``signed_number '+' NUMBER`` and ``signed_number '-' NUMBER`` are for expressing :ref:`complex numbers `; they require a real number diff --git a/Doc/reference/lexical_analysis.rst b/Doc/reference/lexical_analysis.rst index 567c70111c20ec..a7f8e5392b7e71 100644 --- a/Doc/reference/lexical_analysis.rst +++ b/Doc/reference/lexical_analysis.rst @@ -561,9 +561,9 @@ escapes are not treated specially. single: f'; formatted string literal single: f"; formatted string literal -A string literal with ``'f'`` or ``'F'`` in its prefix is a -:dfn:`formatted string literal`; see :ref:`f-strings`. The ``'f'`` may be -combined with ``'r'``, but not with ``'b'`` or ``'u'``, therefore raw +A string literal with ``f`` or ``F`` in its prefix is a +:dfn:`formatted string literal`; see :ref:`f-strings`. The ``f`` may be +combined with ``r``, but not with ``b`` or ``u``, therefore raw formatted strings are possible, but formatted bytes literals are not. In triple-quoted literals, unescaped newlines and quotes are allowed (and are @@ -756,7 +756,7 @@ f-strings .. versionadded:: 3.6 A :dfn:`formatted string literal` or :dfn:`f-string` is a string literal -that is prefixed with ``'f'`` or ``'F'``. These strings may contain +that is prefixed with ``f`` or ``F``. These strings may contain replacement fields, which are expressions delimited by curly braces ``{}``. While other string literals always have a constant value, formatted strings are really expressions evaluated at run time. @@ -913,6 +913,48 @@ See also :pep:`498` for the proposal that added formatted string literals, and :meth:`str.format`, which uses a related format string mechanism. +.. _t-strings: +.. _template-string-literals: + +t-strings +--------- + +.. versionadded:: 3.14 + +A :dfn:`template string literal` or :dfn:`t-string` is a string literal +that is prefixed with ``t`` or ``T``. These strings follow the same +syntax and evaluation rules as :ref:`formatted string literals `, with +the following differences: + +- Rather than evaluating to a ``str`` object, t-strings evaluate to a + :class:`~string.templatelib.Template` object from the + :mod:`string.templatelib` module. + +- The :func:`format` protocol is not used. Instead, the format specifier and + conversions (if any) are passed to a new :class:`~string.templatelib.Interpolation` + object that is created for each evaluated expression. It is up to code that + processes the resulting :class:`~string.templatelib.Template` object to + decide how to handle format specifiers and conversions. + +- Format specifiers containing nested replacement fields are evaluated eagerly, + prior to being passed to the :class:`~string.templatelib.Interpolation` object. + For instance, an interpolation of the form ``{amount:.{precision}f}`` will + evaluate the expression ``{precision}`` before setting the ``format_spec`` + attribute of the resulting :class:`!Interpolation` object; if ``precision`` + is (for example) ``2``, the resulting format specifier will be ``'.2f'``. + +- When the equal sign ``'='`` is provided in an interpolation expression, the + resulting :class:`~string.templatelib.Template` object will have the expression + text along with a ``'='`` character placed in its + :attr:`~string.templatelib.Template.strings` attribute. The + :attr:`~string.templatelib.Template.interpolations` attribute will also + contain an ``Interpolation`` instance for the expression. By default, the + :attr:`~string.templatelib.Interpolation.conversion` attribute will be set to + ``'r'`` (that is, :func:`repr`), unless there is a conversion explicitly + specified (in which case it overrides the default) or a format specifier is + provided (in which case, the ``conversion`` defaults to ``None``). + + .. _numbers: Numeric literals diff --git a/Doc/tutorial/inputoutput.rst b/Doc/tutorial/inputoutput.rst index 35b8c7cd8eb049..ea546c6a29df44 100644 --- a/Doc/tutorial/inputoutput.rst +++ b/Doc/tutorial/inputoutput.rst @@ -95,10 +95,11 @@ Some examples:: >>> repr((x, y, ('spam', 'eggs'))) "(32.5, 40000, ('spam', 'eggs'))" -The :mod:`string` module contains a :class:`~string.Template` class that offers -yet another way to substitute values into strings, using placeholders like -``$x`` and replacing them with values from a dictionary, but offers much less -control of the formatting. +The :mod:`string` module also contains support for so-called +:ref:`$-strings ` that offer yet another way to +substitute values into strings, using placeholders like ``$x`` and replacing +them with values from a dictionary. This syntax is easy to use, although +it offers much less control of the formatting. .. index:: single: formatted string literal diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 6bdf4aee709ad3..e45a2bfa485c50 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -2940,11 +2940,6 @@ New features file. (Contributed by Victor Stinner in :gh:`127350`.) -* Add support of nullable arguments in :c:func:`PyArg_ParseTuple` and - similar functions. - Adding ``?`` after any format unit makes ``None`` be accepted as a value. - (Contributed by Serhiy Storchaka in :gh:`112068`.) - * The ``k`` and ``K`` formats in :c:func:`PyArg_ParseTuple` and similar functions now use :meth:`~object.__index__` if available, like all other integer formats. diff --git a/Lib/_pyrepl/trace.py b/Lib/_pyrepl/trace.py index a8eb2433cd3cce..943ee12f964b29 100644 --- a/Lib/_pyrepl/trace.py +++ b/Lib/_pyrepl/trace.py @@ -1,6 +1,7 @@ from __future__ import annotations import os +import sys # types if False: @@ -12,10 +13,22 @@ trace_file = open(trace_filename, "a") -def trace(line: str, *k: object, **kw: object) -> None: - if trace_file is None: - return - if k or kw: - line = line.format(*k, **kw) - trace_file.write(line + "\n") - trace_file.flush() + +if sys.platform == "emscripten": + from posix import _emscripten_log + + def trace(line: str, *k: object, **kw: object) -> None: + if "PYREPL_TRACE" not in os.environ: + return + if k or kw: + line = line.format(*k, **kw) + _emscripten_log(line) + +else: + def trace(line: str, *k: object, **kw: object) -> None: + if trace_file is None: + return + if k or kw: + line = line.format(*k, **kw) + trace_file.write(line + "\n") + trace_file.flush() diff --git a/Lib/html/parser.py b/Lib/html/parser.py index 7eea885cfe63c5..5d03c98df5cdd0 100644 --- a/Lib/html/parser.py +++ b/Lib/html/parser.py @@ -128,6 +128,7 @@ class HTMLParser(_markupbase.ParserBase): """ CDATA_CONTENT_ELEMENTS = ("script", "style") + RCDATA_CONTENT_ELEMENTS = ("textarea", "title") def __init__(self, *, convert_charrefs=True): """Initialize and reset this instance. @@ -145,6 +146,7 @@ def reset(self): self.lasttag = '???' self.interesting = interesting_normal self.cdata_elem = None + self._escapable = True super().reset() def feed(self, data): @@ -166,14 +168,20 @@ def get_starttag_text(self): """Return full source of start tag: '<...>'.""" return self.__starttag_text - def set_cdata_mode(self, elem): + def set_cdata_mode(self, elem, *, escapable=False): self.cdata_elem = elem.lower() - self.interesting = re.compile(r'])' % self.cdata_elem, - re.IGNORECASE|re.ASCII) + self._escapable = escapable + if escapable and not self.convert_charrefs: + self.interesting = re.compile(r'&|])' % self.cdata_elem, + re.IGNORECASE|re.ASCII) + else: + self.interesting = re.compile(r'])' % self.cdata_elem, + re.IGNORECASE|re.ASCII) def clear_cdata_mode(self): self.interesting = interesting_normal self.cdata_elem = None + self._escapable = True # Internal -- handle data as far as reasonable. May leave state # and data to be processed by a subsequent call. If 'end' is @@ -206,7 +214,7 @@ def goahead(self, end): break j = n if i < j: - if self.convert_charrefs and not self.cdata_elem: + if self.convert_charrefs and self._escapable: self.handle_data(unescape(rawdata[i:j])) else: self.handle_data(rawdata[i:j]) @@ -308,7 +316,7 @@ def goahead(self, end): assert 0, "interesting.search() lied" # end while if end and i < n: - if self.convert_charrefs and not self.cdata_elem: + if self.convert_charrefs and self._escapable: self.handle_data(unescape(rawdata[i:n])) else: self.handle_data(rawdata[i:n]) @@ -420,6 +428,8 @@ def parse_starttag(self, i): self.handle_starttag(tag, attrs) if tag in self.CDATA_CONTENT_ELEMENTS: self.set_cdata_mode(tag) + elif tag in self.RCDATA_CONTENT_ELEMENTS: + self.set_cdata_mode(tag, escapable=True) return endpos # Internal -- check to see if we have a complete starttag; return end diff --git a/Lib/test/test_build_details.py b/Lib/test/test_build_details.py index ba4b8c5aa9b58e..691fd0bb98c097 100644 --- a/Lib/test/test_build_details.py +++ b/Lib/test/test_build_details.py @@ -124,6 +124,10 @@ def test_location(self): def test_base_interpreter(self): value = self.key('base_interpreter') + # Skip check if installation is relocated + if sysconfig._installation_is_relocated(): + self.skipTest("Installation is relocated") + self.assertEqual(os.path.realpath(value), os.path.realpath(sys.executable)) @needs_installed_python @@ -133,6 +137,11 @@ def test_base_interpreter(self): ) def test_c_api(self): value = self.key('c_api') + + # Skip check if installation is relocated + if sysconfig._installation_is_relocated(): + self.skipTest("Installation is relocated") + self.assertTrue(os.path.exists(os.path.join(value['headers'], 'Python.h'))) version = sysconfig.get_config_var('VERSION') self.assertTrue(os.path.exists(os.path.join(value['pkgconfig_path'], f'python-{version}.pc'))) diff --git a/Lib/test/test_capi/test_getargs.py b/Lib/test/test_capi/test_getargs.py index dc2000089684a6..0b2473bac2be11 100644 --- a/Lib/test/test_capi/test_getargs.py +++ b/Lib/test/test_capi/test_getargs.py @@ -1429,123 +1429,6 @@ def test_nested_sequence(self): "argument 1 must be sequence of length 1, not 0"): parse(([],), {}, '(' + f + ')', ['a']) - def test_specific_type_errors(self): - parse = _testcapi.parse_tuple_and_keywords - - def check(format, arg, expected, got='list'): - errmsg = f'must be {expected}, not {got}' - with self.assertRaisesRegex(TypeError, errmsg): - parse((arg,), {}, format, ['a']) - - check('k', [], 'int') - check('k?', [], 'int or None') - check('K', [], 'int') - check('K?', [], 'int or None') - check('c', [], 'a byte string of length 1') - check('c?', [], 'a byte string of length 1 or None') - check('c', b'abc', 'a byte string of length 1', - 'a bytes object of length 3') - check('c?', b'abc', 'a byte string of length 1 or None', - 'a bytes object of length 3') - check('c', bytearray(b'abc'), 'a byte string of length 1', - 'a bytearray object of length 3') - check('c?', bytearray(b'abc'), 'a byte string of length 1 or None', - 'a bytearray object of length 3') - check('C', [], 'a unicode character') - check('C?', [], 'a unicode character or None') - check('C', 'abc', 'a unicode character', - 'a string of length 3') - check('C?', 'abc', 'a unicode character or None', - 'a string of length 3') - check('s', [], 'str') - check('s?', [], 'str or None') - check('z', [], 'str or None') - check('z?', [], 'str or None') - check('es', [], 'str') - check('es?', [], 'str or None') - check('es#', [], 'str') - check('es#?', [], 'str or None') - check('et', [], 'str, bytes or bytearray') - check('et?', [], 'str, bytes, bytearray or None') - check('et#', [], 'str, bytes or bytearray') - check('et#?', [], 'str, bytes, bytearray or None') - check('w*', [], 'read-write bytes-like object') - check('w*?', [], 'read-write bytes-like object or None') - check('S', [], 'bytes') - check('S?', [], 'bytes or None') - check('U', [], 'str') - check('U?', [], 'str or None') - check('Y', [], 'bytearray') - check('Y?', [], 'bytearray or None') - check('(OO)', 42, '2-item tuple', 'int') - check('(OO)?', 42, '2-item tuple or None', 'int') - check('(OO)', (1, 2, 3), 'tuple of length 2', '3') - - def test_nullable(self): - parse = _testcapi.parse_tuple_and_keywords - - def check(format, arg, allows_none=False): - # Because some format units (such as y*) require cleanup, - # we force the parsing code to perform the cleanup by adding - # an argument that always fails. - # By checking for an exception, we ensure that the parsing - # of the first argument was successful. - self.assertRaises(OverflowError, parse, - (arg, 256), {}, format + '?b', ['a', 'b']) - self.assertRaises(OverflowError, parse, - (None, 256), {}, format + '?b', ['a', 'b']) - self.assertRaises(OverflowError, parse, - (arg, 256), {}, format + 'b', ['a', 'b']) - self.assertRaises(OverflowError if allows_none else TypeError, parse, - (None, 256), {}, format + 'b', ['a', 'b']) - - check('b', 42) - check('B', 42) - check('h', 42) - check('H', 42) - check('i', 42) - check('I', 42) - check('n', 42) - check('l', 42) - check('k', 42) - check('L', 42) - check('K', 42) - check('f', 2.5) - check('d', 2.5) - check('D', 2.5j) - check('c', b'a') - check('C', 'a') - check('p', True, allows_none=True) - check('y', b'buffer') - check('y*', b'buffer') - check('y#', b'buffer') - check('s', 'string') - check('s*', 'string') - check('s#', 'string') - check('z', 'string', allows_none=True) - check('z*', 'string', allows_none=True) - check('z#', 'string', allows_none=True) - check('w*', bytearray(b'buffer')) - check('U', 'string') - check('S', b'bytes') - check('Y', bytearray(b'bytearray')) - check('O', object, allows_none=True) - - check('(OO)', (1, 2)) - self.assertEqual(parse((((1, 2), 3),), {}, '((OO)?O)', ['a']), (1, 2, 3)) - self.assertEqual(parse(((None, 3),), {}, '((OO)?O)', ['a']), (NULL, NULL, 3)) - self.assertEqual(parse((((1, 2), 3),), {}, '((OO)O)', ['a']), (1, 2, 3)) - self.assertRaises(TypeError, parse, ((None, 3),), {}, '((OO)O)', ['a']) - - parse((None,), {}, 'es?', ['a']) - parse((None,), {}, 'es#?', ['a']) - parse((None,), {}, 'et?', ['a']) - parse((None,), {}, 'et#?', ['a']) - parse((None,), {}, 'O!?', ['a']) - parse((None,), {}, 'O&?', ['a']) - - # TODO: More tests for es?, es#?, et?, et#?, O!, O& - @unittest.skipIf(_testinternalcapi is None, 'needs _testinternalcapi') def test_gh_119213(self): rc, out, err = script_helper.assert_python_ok("-c", """if True: diff --git a/Lib/test/test_fcntl.py b/Lib/test/test_fcntl.py index 7140a7b4f29188..222b69a6d250cd 100644 --- a/Lib/test/test_fcntl.py +++ b/Lib/test/test_fcntl.py @@ -8,7 +8,7 @@ import sys import unittest from test.support import ( - cpython_only, get_pagesize, is_apple, requires_subprocess, verbose + cpython_only, get_pagesize, is_apple, requires_subprocess, verbose, is_emscripten ) from test.support.import_helper import import_module from test.support.os_helper import TESTFN, unlink, make_bad_fd @@ -211,6 +211,7 @@ def test_fcntl_f_getpath(self): @unittest.skipUnless( hasattr(fcntl, "F_SETPIPE_SZ") and hasattr(fcntl, "F_GETPIPE_SZ"), "F_SETPIPE_SZ and F_GETPIPE_SZ are not available on all platforms.") + @unittest.skipIf(is_emscripten, "Emscripten pipefs doesn't support these") def test_fcntl_f_pipesize(self): test_pipe_r, test_pipe_w = os.pipe() try: @@ -265,12 +266,14 @@ def _check_fcntl_not_mutate_len(self, nbytes=None): @unittest.skipUnless( hasattr(fcntl, "F_SETOWN_EX") and hasattr(fcntl, "F_GETOWN_EX"), "requires F_SETOWN_EX and F_GETOWN_EX") + @unittest.skipIf(is_emscripten, "Emscripten doesn't actually support these") def test_fcntl_small_buffer(self): self._check_fcntl_not_mutate_len() @unittest.skipUnless( hasattr(fcntl, "F_SETOWN_EX") and hasattr(fcntl, "F_GETOWN_EX"), "requires F_SETOWN_EX and F_GETOWN_EX") + @unittest.skipIf(is_emscripten, "Emscripten doesn't actually support these") def test_fcntl_large_buffer(self): self._check_fcntl_not_mutate_len(2024) diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py index 47c0752fb517b9..380bbe40177ec5 100644 --- a/Lib/test/test_htmlparser.py +++ b/Lib/test/test_htmlparser.py @@ -317,6 +317,49 @@ def test_style_content(self, content): ("data", content), ("endtag", "style")]) + @support.subTests('content', [ + '', + "", + '', + '', + '', + '\u2603', + '< /title>', + '', + '', + '', + '', + '', + ]) + def test_title_content(self, content): + source = f"{content}" + self._run_check(source, [ + ("starttag", "title", []), + ("data", content), + ("endtag", "title"), + ]) + + @support.subTests('content', [ + '', + "", + '', + '', + '', + '\u2603', + '< /textarea>', + '', + '', + '', + '', + ]) + def test_textarea_content(self, content): + source = f"" + self._run_check(source, [ + ("starttag", "textarea", []), + ("data", content), + ("endtag", "textarea"), + ]) + @support.subTests('endtag', ['script', 'SCRIPT', 'script ', 'script\n', 'script/', 'script foo=bar', 'script foo=">"']) def test_script_closing_tag(self, endtag): @@ -346,6 +389,38 @@ def test_style_closing_tag(self, endtag): ("endtag", "style")], collector=EventCollectorNoNormalize(convert_charrefs=False)) + @support.subTests('endtag', ['title', 'TITLE', 'title ', 'title\n', + 'title/', 'title foo=bar', 'title foo=">"']) + def test_title_closing_tag(self, endtag): + content = "Egg & Spam" + s = f'{content}</{endtag}>' + self._run_check(s, [("starttag", "title", []), + ('data', '<!-- not a comment --><i>Egg & Spam</i>'), + ("endtag", "title")], + collector=EventCollectorNoNormalize(convert_charrefs=True)) + self._run_check(s, [("starttag", "title", []), + ('data', '<!-- not a comment --><i>Egg '), + ('entityref', 'amp'), + ('data', ' Spam</i>'), + ("endtag", "title")], + collector=EventCollectorNoNormalize(convert_charrefs=False)) + + @support.subTests('endtag', ['textarea', 'TEXTAREA', 'textarea ', 'textarea\n', + 'textarea/', 'textarea foo=bar', 'textarea foo=">"']) + def test_textarea_closing_tag(self, endtag): + content = "<!-- not a comment --><i>Egg & Spam</i>" + s = f'<TexTarEa>{content}</{endtag}>' + self._run_check(s, [("starttag", "textarea", []), + ('data', '<!-- not a comment --><i>Egg & Spam</i>'), + ("endtag", "textarea")], + collector=EventCollectorNoNormalize(convert_charrefs=True)) + self._run_check(s, [("starttag", "textarea", []), + ('data', '<!-- not a comment --><i>Egg '), + ('entityref', 'amp'), + ('data', ' Spam</i>'), + ("endtag", "textarea")], + collector=EventCollectorNoNormalize(convert_charrefs=False)) + @support.subTests('tail,end', [ ('', False), ('<', False), @@ -363,6 +438,27 @@ def test_eof_in_script(self, tail, end): ("data", content if end else content + tail)], collector=EventCollectorNoNormalize(convert_charrefs=False)) + @support.subTests('tail,end', [ + ('', False), + ('<', False), + ('</', False), + ('</t', False), + ('</title', False), + ('" '' diff --git a/Lib/test/test_mmap.py b/Lib/test/test_mmap.py index fd4197b7086976..b2a299ed172967 100644 --- a/Lib/test/test_mmap.py +++ b/Lib/test/test_mmap.py @@ -732,7 +732,7 @@ def test_tagname(self): m2.close() m1.close() - with self.assertRaisesRegex(TypeError, 'must be str or None'): + with self.assertRaisesRegex(TypeError, 'tagname'): mmap.mmap(-1, 8, tagname=1) @cpython_only diff --git a/Lib/test/test_sysconfig.py b/Lib/test/test_sysconfig.py index 2eb8de4b29fe96..4aaef5b142931e 100644 --- a/Lib/test/test_sysconfig.py +++ b/Lib/test/test_sysconfig.py @@ -708,7 +708,7 @@ def test_sysconfigdata_json(self): ignore_keys |= {'prefix', 'exec_prefix', 'base', 'platbase'} # Keys dependent on Python being run from the prefix targetted when building (different on relocatable installs) if sysconfig._installation_is_relocated(): - ignore_keys |= {'prefix', 'exec_prefix', 'base', 'platbase', 'installed_base', 'installed_platbase'} + ignore_keys |= {'prefix', 'exec_prefix', 'base', 'platbase', 'installed_base', 'installed_platbase', 'srcdir'} for key in ignore_keys: json_config_vars.pop(key, None) diff --git a/Makefile.pre.in b/Makefile.pre.in index 959ccb891f283c..fa17f5d7bfc0ac 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -804,7 +804,7 @@ build_wasm: check-clean-src $(BUILDPYTHON) platform sharedmods \ python-config checksharedmods .PHONY: build_emscripten -build_emscripten: build_wasm web_example +build_emscripten: build_wasm web_example web_example_pyrepl_jspi # Check that the source is clean when building out of source. .PHONY: check-clean-src @@ -1095,26 +1095,28 @@ $(DLLLIBRARY) libpython$(LDVERSION).dll.a: $(LIBRARY_OBJS) # wasm32-emscripten browser web example -WEBEX_DIR=$(srcdir)/Tools/wasm/emscripten/web_example/ +EMSCRIPTEN_DIR=$(srcdir)/Tools/wasm/emscripten +WEBEX_DIR=$(EMSCRIPTEN_DIR)/web_example/ + +ZIP_STDLIB=python$(VERSION)$(ABI_THREAD).zip +$(ZIP_STDLIB): $(srcdir)/Lib/*.py $(srcdir)/Lib/*/*.py \ + $(EMSCRIPTEN_DIR)/wasm_assets.py \ + Makefile pybuilddir.txt Modules/Setup.local + $(PYTHON_FOR_BUILD) $(EMSCRIPTEN_DIR)/wasm_assets.py \ + --buildroot . --prefix $(prefix) -o $@ + web_example/index.html: $(WEBEX_DIR)/index.html @mkdir -p web_example @cp $< $@ -web_example/python.worker.mjs: $(WEBEX_DIR)/python.worker.mjs +web_example/server.py: $(WEBEX_DIR)/server.py @mkdir -p web_example @cp $< $@ -web_example/server.py: $(WEBEX_DIR)/server.py +web_example/$(ZIP_STDLIB): $(ZIP_STDLIB) @mkdir -p web_example @cp $< $@ -WEB_STDLIB=web_example/python$(VERSION)$(ABI_THREAD).zip -$(WEB_STDLIB): $(srcdir)/Lib/*.py $(srcdir)/Lib/*/*.py \ - $(WEBEX_DIR)/wasm_assets.py \ - Makefile pybuilddir.txt Modules/Setup.local - $(PYTHON_FOR_BUILD) $(WEBEX_DIR)/wasm_assets.py \ - --buildroot . --prefix $(prefix) -o $@ - web_example/python.mjs web_example/python.wasm: $(BUILDPYTHON) @if test $(HOST_GNU_TYPE) != 'wasm32-unknown-emscripten' ; then \ echo "Can only build web_example when target is Emscripten" ;\ @@ -1124,7 +1126,35 @@ web_example/python.mjs web_example/python.wasm: $(BUILDPYTHON) cp python.wasm web_example/python.wasm .PHONY: web_example -web_example: web_example/python.mjs web_example/python.worker.mjs web_example/index.html web_example/server.py $(WEB_STDLIB) +web_example: web_example/python.mjs web_example/index.html web_example/server.py web_example/$(ZIP_STDLIB) + +WEBEX2=web_example_pyrepl_jspi +WEBEX2_DIR=$(EMSCRIPTEN_DIR)/$(WEBEX2)/ + +$(WEBEX2)/python.mjs $(WEBEX2)/python.wasm: $(BUILDPYTHON) + @if test $(HOST_GNU_TYPE) != 'wasm32-unknown-emscripten' ; then \ + echo "Can only build web_example when target is Emscripten" ;\ + exit 1 ;\ + fi + @mkdir -p $(WEBEX2) + @cp python.mjs $(WEBEX2)/python.mjs + @cp python.wasm $(WEBEX2)/python.wasm + +$(WEBEX2)/index.html: $(WEBEX2_DIR)/index.html + @mkdir -p $(WEBEX2) + @cp $< $@ + +$(WEBEX2)/src.mjs: $(WEBEX2_DIR)/src.mjs + @mkdir -p $(WEBEX2) + @cp $< $@ + +$(WEBEX2)/$(ZIP_STDLIB): $(ZIP_STDLIB) + @mkdir -p $(WEBEX2) + @cp $< $@ + +.PHONY: web_example_pyrepl_jspi +web_example_pyrepl_jspi: $(WEBEX2)/python.mjs $(WEBEX2)/index.html $(WEBEX2)/src.mjs $(WEBEX2)/$(ZIP_STDLIB) + ############################################################################ # Header files diff --git a/Misc/NEWS.d/3.14.0b1.rst b/Misc/NEWS.d/3.14.0b1.rst index 02ceb82b556386..5d03d429f9ee14 100644 --- a/Misc/NEWS.d/3.14.0b1.rst +++ b/Misc/NEWS.d/3.14.0b1.rst @@ -2012,7 +2012,7 @@ interpreter. .. nonce: ofI5Fl .. section: C API -Add support of nullable arguments in :c:func:`PyArg_Parse` and similar +[Reverted in :gh:`136991`] Add support of nullable arguments in :c:func:`PyArg_Parse` and similar functions. Adding ``?`` after any format unit makes ``None`` be accepted as a value. diff --git a/Misc/NEWS.d/next/C_API/2025-07-22-15-18-08.gh-issue-112068.4WvT-8.rst b/Misc/NEWS.d/next/C_API/2025-07-22-15-18-08.gh-issue-112068.4WvT-8.rst new file mode 100644 index 00000000000000..018c5c7880c001 --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2025-07-22-15-18-08.gh-issue-112068.4WvT-8.rst @@ -0,0 +1 @@ +Revert support of nullable arguments in :c:func:`PyArg_Parse`. diff --git a/Misc/NEWS.d/next/Library/2025-07-21-16-10-24.gh-issue-124621.wyoWc1.rst b/Misc/NEWS.d/next/Library/2025-07-21-16-10-24.gh-issue-124621.wyoWc1.rst new file mode 100644 index 00000000000000..34049183649271 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-07-21-16-10-24.gh-issue-124621.wyoWc1.rst @@ -0,0 +1 @@ +pyrepl now works in Emscripten. diff --git a/Misc/NEWS.d/next/Security/2025-06-09-20-38-25.gh-issue-118350.KgWCcP.rst b/Misc/NEWS.d/next/Security/2025-06-09-20-38-25.gh-issue-118350.KgWCcP.rst new file mode 100644 index 00000000000000..6ad3caf33b2201 --- /dev/null +++ b/Misc/NEWS.d/next/Security/2025-06-09-20-38-25.gh-issue-118350.KgWCcP.rst @@ -0,0 +1,2 @@ +Fix support of escapable raw text mode (elements "textarea" and "title") +in :class:`html.parser.HTMLParser`. diff --git a/Modules/_ctypes/_ctypes.c b/Modules/_ctypes/_ctypes.c index 7e8a133caa72ac..4bd3e380b3bc4b 100644 --- a/Modules/_ctypes/_ctypes.c +++ b/Modules/_ctypes/_ctypes.c @@ -3937,7 +3937,9 @@ _validate_paramflags(ctypes_state *st, PyTypeObject *type, PyObject *paramflags) PyObject *name = Py_None; PyObject *defval; PyObject *typ; - if (!PyArg_ParseTuple(item, "i|U?O", &flag, &name, &defval)) { + if (!PyArg_ParseTuple(item, "i|OO", &flag, &name, &defval) || + !(name == Py_None || PyUnicode_Check(name))) + { PyErr_SetString(PyExc_TypeError, "paramflags must be a sequence of (int [,string [,value]]) tuples"); return 0; @@ -4002,8 +4004,10 @@ PyCFuncPtr_FromDll(PyTypeObject *type, PyObject *args, PyObject *kwds) void *handle; PyObject *paramflags = NULL; - if (!PyArg_ParseTuple(args, "O|O?", &ftuple, ¶mflags)) + if (!PyArg_ParseTuple(args, "O|O", &ftuple, ¶mflags)) return NULL; + if (paramflags == Py_None) + paramflags = NULL; ftuple = PySequence_Tuple(ftuple); if (!ftuple) @@ -4135,8 +4139,10 @@ PyCFuncPtr_FromVtblIndex(PyTypeObject *type, PyObject *args, PyObject *kwds) GUID *iid = NULL; Py_ssize_t iid_len = 0; - if (!PyArg_ParseTuple(args, "is|O?z#", &index, &name, ¶mflags, &iid, &iid_len)) + if (!PyArg_ParseTuple(args, "is|Oz#", &index, &name, ¶mflags, &iid, &iid_len)) return NULL; + if (paramflags == Py_None) + paramflags = NULL; ctypes_state *st = get_module_state_by_def(Py_TYPE(type)); if (!_validate_paramflags(st, type, paramflags)) { diff --git a/Modules/_interpretersmodule.c b/Modules/_interpretersmodule.c index 9426ce72733c28..faf3b25b68c4eb 100644 --- a/Modules/_interpretersmodule.c +++ b/Modules/_interpretersmodule.c @@ -1415,11 +1415,14 @@ interp_get_config(PyObject *self, PyObject *args, PyObject *kwds) PyObject *idobj = NULL; int restricted = 0; if (!PyArg_ParseTupleAndKeywords(args, kwds, - "O?|$p:get_config", kwlist, + "O|$p:get_config", kwlist, &idobj, &restricted)) { return NULL; } + if (idobj == Py_None) { + idobj = NULL; + } int reqready = 0; PyInterpreterState *interp = \ @@ -1536,14 +1539,14 @@ capture_exception(PyObject *self, PyObject *args, PyObject *kwds) static char *kwlist[] = {"exc", NULL}; PyObject *exc_arg = NULL; if (!PyArg_ParseTupleAndKeywords(args, kwds, - "|O?:capture_exception", kwlist, + "|O:capture_exception", kwlist, &exc_arg)) { return NULL; } PyObject *exc = exc_arg; - if (exc == NULL) { + if (exc == NULL || exc == Py_None) { exc = PyErr_GetRaisedException(); if (exc == NULL) { Py_RETURN_NONE; diff --git a/Modules/_json.c b/Modules/_json.c index 6b5f6ea42df4d1..7580b589e2d937 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -1222,16 +1222,23 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL}; PyEncoderObject *s; - PyObject *markers = Py_None, *defaultfn, *encoder, *indent, *key_separator; + PyObject *markers, *defaultfn, *encoder, *indent, *key_separator; PyObject *item_separator; int sort_keys, skipkeys, allow_nan; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!?OOOUUppp:make_encoder", kwlist, - &PyDict_Type, &markers, &defaultfn, &encoder, &indent, + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOUUppp:make_encoder", kwlist, + &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator, &sort_keys, &skipkeys, &allow_nan)) return NULL; + if (markers != Py_None && !PyDict_Check(markers)) { + PyErr_Format(PyExc_TypeError, + "make_encoder() argument 1 must be dict or None, " + "not %.200s", Py_TYPE(markers)->tp_name); + return NULL; + } + s = (PyEncoderObject *)type->tp_alloc(type, 0); if (s == NULL) return NULL; diff --git a/Modules/_threadmodule.c b/Modules/_threadmodule.c index 3540fead8e8e74..f82ad6870f850f 100644 --- a/Modules/_threadmodule.c +++ b/Modules/_threadmodule.c @@ -681,12 +681,12 @@ PyThreadHandleObject_join(PyObject *op, PyObject *args) PyThreadHandleObject *self = PyThreadHandleObject_CAST(op); PyObject *timeout_obj = NULL; - if (!PyArg_ParseTuple(args, "|O?:join", &timeout_obj)) { + if (!PyArg_ParseTuple(args, "|O:join", &timeout_obj)) { return NULL; } PyTime_t timeout_ns = -1; - if (timeout_obj != NULL) { + if (timeout_obj != NULL && timeout_obj != Py_None) { if (_PyTime_FromSecondsObject(&timeout_ns, timeout_obj, _PyTime_ROUND_TIMEOUT) < 0) { return NULL; @@ -1957,10 +1957,10 @@ thread_PyThread_start_joinable_thread(PyObject *module, PyObject *fargs, PyObject *func = NULL; int daemon = 1; thread_module_state *state = get_thread_state(module); - PyObject *hobj = Py_None; + PyObject *hobj = NULL; if (!PyArg_ParseTupleAndKeywords(fargs, fkwargs, - "O|O!?p:start_joinable_thread", keywords, - &func, state->thread_handle_type, &hobj, &daemon)) { + "O|Op:start_joinable_thread", keywords, + &func, &hobj, &daemon)) { return NULL; } @@ -1970,6 +1970,14 @@ thread_PyThread_start_joinable_thread(PyObject *module, PyObject *fargs, return NULL; } + if (hobj == NULL) { + hobj = Py_None; + } + else if (hobj != Py_None && !Py_IS_TYPE(hobj, state->thread_handle_type)) { + PyErr_SetString(PyExc_TypeError, "'handle' must be a _ThreadHandle"); + return NULL; + } + if (PySys_Audit("_thread.start_joinable_thread", "OiO", func, daemon, hobj) < 0) { return NULL; diff --git a/Modules/_zstd/_zstdmodule.c b/Modules/_zstd/_zstdmodule.c index d75c0779474a82..8af6156a0da575 100644 --- a/Modules/_zstd/_zstdmodule.c +++ b/Modules/_zstd/_zstdmodule.c @@ -1,4 +1,4 @@ -/* Low level interface to the Zstandard algorthm & the zstd library. */ +/* Low level interface to the Zstandard algorithm & the zstd library. */ #ifndef Py_BUILD_CORE_BUILTIN # define Py_BUILD_CORE_MODULE 1 diff --git a/Modules/_zstd/_zstdmodule.h b/Modules/_zstd/_zstdmodule.h index 4e8f708f2232c7..82226ff8718e6b 100644 --- a/Modules/_zstd/_zstdmodule.h +++ b/Modules/_zstd/_zstdmodule.h @@ -1,4 +1,4 @@ -/* Low level interface to the Zstandard algorthm & the zstd library. */ +/* Low level interface to the Zstandard algorithm & the zstd library. */ /* Declarations shared between different parts of the _zstd module*/ diff --git a/Modules/_zstd/buffer.h b/Modules/_zstd/buffer.h index 4c885fa0d720fd..0ac7bcb4ddc416 100644 --- a/Modules/_zstd/buffer.h +++ b/Modules/_zstd/buffer.h @@ -1,4 +1,4 @@ -/* Low level interface to the Zstandard algorthm & the zstd library. */ +/* Low level interface to the Zstandard algorithm & the zstd library. */ #ifndef ZSTD_BUFFER_H #define ZSTD_BUFFER_H diff --git a/Modules/_zstd/compressor.c b/Modules/_zstd/compressor.c index bc9e6eff89af68..508b136817872b 100644 --- a/Modules/_zstd/compressor.c +++ b/Modules/_zstd/compressor.c @@ -1,4 +1,4 @@ -/* Low level interface to the Zstandard algorthm & the zstd library. */ +/* Low level interface to the Zstandard algorithm & the zstd library. */ /* ZstdCompressor class definitions */ diff --git a/Modules/_zstd/decompressor.c b/Modules/_zstd/decompressor.c index c53d6e4cb05cf0..b00ee05d2f51bf 100644 --- a/Modules/_zstd/decompressor.c +++ b/Modules/_zstd/decompressor.c @@ -1,4 +1,4 @@ -/* Low level interface to the Zstandard algorthm & the zstd library. */ +/* Low level interface to the Zstandard algorithm & the zstd library. */ /* ZstdDecompressor class definition */ diff --git a/Modules/_zstd/zstddict.c b/Modules/_zstd/zstddict.c index 14f74aaed46ec5..35d6ca8e55a265 100644 --- a/Modules/_zstd/zstddict.c +++ b/Modules/_zstd/zstddict.c @@ -1,4 +1,4 @@ -/* Low level interface to the Zstandard algorthm & the zstd library. */ +/* Low level interface to the Zstandard algorithm & the zstd library. */ /* ZstdDict class definitions */ diff --git a/Modules/_zstd/zstddict.h b/Modules/_zstd/zstddict.h index 4a403416dbd4a3..e0d3f46b2b14a6 100644 --- a/Modules/_zstd/zstddict.h +++ b/Modules/_zstd/zstddict.h @@ -1,4 +1,4 @@ -/* Low level interface to the Zstandard algorthm & the zstd library. */ +/* Low level interface to the Zstandard algorithm & the zstd library. */ #ifndef ZSTD_DICT_H #define ZSTD_DICT_H diff --git a/Modules/clinic/posixmodule.c.h b/Modules/clinic/posixmodule.c.h index 65f5f8c9267b6c..0a281cbe6c57a2 100644 --- a/Modules/clinic/posixmodule.c.h +++ b/Modules/clinic/posixmodule.c.h @@ -12769,6 +12769,80 @@ os__emscripten_debugger(PyObject *module, PyObject *Py_UNUSED(ignored)) #endif /* defined(__EMSCRIPTEN__) */ +#if defined(__EMSCRIPTEN__) + +PyDoc_STRVAR(os__emscripten_log__doc__, +"_emscripten_log($module, /, arg)\n" +"--\n" +"\n" +"Log something to the JS console. Emscripten only."); + +#define OS__EMSCRIPTEN_LOG_METHODDEF \ + {"_emscripten_log", _PyCFunction_CAST(os__emscripten_log), METH_FASTCALL|METH_KEYWORDS, os__emscripten_log__doc__}, + +static PyObject * +os__emscripten_log_impl(PyObject *module, const char *arg); + +static PyObject * +os__emscripten_log(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 1 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(arg), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"arg", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "_emscripten_log", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[1]; + const char *arg; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + if (!PyUnicode_Check(args[0])) { + _PyArg_BadArgument("_emscripten_log", "argument 'arg'", "str", args[0]); + goto exit; + } + Py_ssize_t arg_length; + arg = PyUnicode_AsUTF8AndSize(args[0], &arg_length); + if (arg == NULL) { + goto exit; + } + if (strlen(arg) != (size_t)arg_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } + return_value = os__emscripten_log_impl(module, arg); + +exit: + return return_value; +} + +#endif /* defined(__EMSCRIPTEN__) */ + #ifndef OS_TTYNAME_METHODDEF #define OS_TTYNAME_METHODDEF #endif /* !defined(OS_TTYNAME_METHODDEF) */ @@ -13440,4 +13514,8 @@ os__emscripten_debugger(PyObject *module, PyObject *Py_UNUSED(ignored)) #ifndef OS__EMSCRIPTEN_DEBUGGER_METHODDEF #define OS__EMSCRIPTEN_DEBUGGER_METHODDEF #endif /* !defined(OS__EMSCRIPTEN_DEBUGGER_METHODDEF) */ -/*[clinic end generated code: output=6cfddb3b77dc7a40 input=a9049054013a1b77]*/ + +#ifndef OS__EMSCRIPTEN_LOG_METHODDEF + #define OS__EMSCRIPTEN_LOG_METHODDEF +#endif /* !defined(OS__EMSCRIPTEN_LOG_METHODDEF) */ +/*[clinic end generated code: output=608e9bc5f631f688 input=a9049054013a1b77]*/ diff --git a/Modules/mmapmodule.c b/Modules/mmapmodule.c index 142ff1a21316ab..0cb4b62d734550 100644 --- a/Modules/mmapmodule.c +++ b/Modules/mmapmodule.c @@ -23,6 +23,7 @@ #endif #include +#include "pycore_abstract.h" // _Py_convert_optional_to_ssize_t() #include "pycore_bytesobject.h" // _PyBytes_Find() #include "pycore_fileutils.h" // _Py_stat_struct #include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() @@ -529,7 +530,7 @@ mmap_read_method(PyObject *op, PyObject *args) mmap_object *self = mmap_object_CAST(op); CHECK_VALID(NULL); - if (!PyArg_ParseTuple(args, "|n?:read", &num_bytes)) + if (!PyArg_ParseTuple(args, "|O&:read", _Py_convert_optional_to_ssize_t, &num_bytes)) return NULL; CHECK_VALID(NULL); @@ -1723,7 +1724,7 @@ new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict) DWORD off_lo; /* lower 32 bits of offset */ DWORD size_hi; /* upper 32 bits of size */ DWORD size_lo; /* lower 32 bits of size */ - PyObject *tagname = NULL; + PyObject *tagname = Py_None; DWORD dwErr = 0; int fileno; HANDLE fh = 0; @@ -1733,7 +1734,7 @@ new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict) "tagname", "access", "offset", NULL }; - if (!PyArg_ParseTupleAndKeywords(args, kwdict, "in|U?iL", keywords, + if (!PyArg_ParseTupleAndKeywords(args, kwdict, "in|OiL", keywords, &fileno, &map_size, &tagname, &access, &offset)) { return NULL; @@ -1866,7 +1867,13 @@ new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict) m_obj->weakreflist = NULL; m_obj->exports = 0; /* set the tag name */ - if (tagname != NULL) { + if (!Py_IsNone(tagname)) { + if (!PyUnicode_Check(tagname)) { + Py_DECREF(m_obj); + return PyErr_Format(PyExc_TypeError, "expected str or None for " + "'tagname', not %.200s", + Py_TYPE(tagname)->tp_name); + } m_obj->tagname = PyUnicode_AsWideCharString(tagname, NULL); if (m_obj->tagname == NULL) { Py_DECREF(m_obj); diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 47eaf5cd428a53..77622fbc4e8065 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -16971,6 +16971,25 @@ os__emscripten_debugger_impl(PyObject *module) emscripten_debugger(); Py_RETURN_NONE; } + +EM_JS(void, emscripten_log_impl_js, (const char* arg), { + console.warn(UTF8ToString(arg)); +}); + +/*[clinic input] +os._emscripten_log + arg: str + +Log something to the JS console. Emscripten only. +[clinic start generated code]*/ + +static PyObject * +os__emscripten_log_impl(PyObject *module, const char *arg) +/*[clinic end generated code: output=9749e5e293c42784 input=350aa1f70bc1e905]*/ +{ + emscripten_log_impl_js(arg); + Py_RETURN_NONE; +} #endif /* __EMSCRIPTEN__ */ @@ -17190,6 +17209,7 @@ static PyMethodDef posix_methods[] = { OS__IS_INPUTHOOK_INSTALLED_METHODDEF OS__CREATE_ENVIRON_METHODDEF OS__EMSCRIPTEN_DEBUGGER_METHODDEF + OS__EMSCRIPTEN_LOG_METHODDEF {NULL, NULL} /* Sentinel */ }; diff --git a/Python/emscripten_syscalls.c b/Python/emscripten_syscalls.c index d3eedad30e3639..98ee44276e53e0 100644 --- a/Python/emscripten_syscalls.c +++ b/Python/emscripten_syscalls.c @@ -1,4 +1,9 @@ #include "emscripten.h" +#include "stdio.h" + +// All system calls: return nonnegative number on success, return -errno on +// failure. Negative results get stored back into errno here: +// https://github.com/emscripten-core/emscripten/blob/main/system/lib/libc/musl/src/internal/syscall_ret.c#L7 // If we're running in node, report the UID of the user in the native system as // the UID of the user. Since the nodefs will report the uid correctly, if we @@ -40,7 +45,7 @@ int __syscall_umask(int mask) { #include #include -#undef errno +#include // Variant of EM_JS that does C preprocessor substitution on the body #define EM_JS_MACROS(ret, func_name, args, body...) \ @@ -100,7 +105,7 @@ EM_JS_MACROS(void, _emscripten_promising_main_js, (void), { return; } const origResolveGlobalSymbol = resolveGlobalSymbol; - if (!Module.onExit && globalThis?.process?.exit) { + if (ENVIRONMENT_IS_NODE && !Module.onExit) { Module.onExit = (code) => process.exit(code); } // * wrap the main symbol with WebAssembly.promising, @@ -115,7 +120,7 @@ EM_JS_MACROS(void, _emscripten_promising_main_js, (void), { orig.sym = (...args) => { (async () => { const ret = await main(...args); - process?.exit?.(ret); + Module.onExit?.(ret); })(); _emscripten_exit_with_live_runtime(); }; @@ -185,7 +190,7 @@ EM_JS_MACROS(__externref_t, __maybe_fd_read_async, ( if (e.name !== 'ErrnoError') { throw e; } - return e.errno; + return e["errno"]; } })(); }; @@ -199,16 +204,16 @@ __wasi_errno_t __wasi_fd_read_orig(__wasi_fd_t fd, const __wasi_iovec_t *iovs, // Take a promise that resolves to __wasi_errno_t and suspend until it resolves, // get the output. -EM_JS(__wasi_errno_t, __block_for_errno, (__externref_t p), { +EM_JS(int, __block_for_int, (__externref_t p), { return p; } if (WebAssembly.Suspending) { - __block_for_errno = new WebAssembly.Suspending(__block_for_errno); + __block_for_int = new WebAssembly.Suspending(__block_for_int); } ) // Replacement for fd_read syscall. Call __maybe_fd_read_async. If it returned -// null, delegate back to __wasi_fd_read_orig. Otherwise, use __block_for_errno +// null, delegate back to __wasi_fd_read_orig. Otherwise, use __block_for_int // to get the result. __wasi_errno_t __wasi_fd_read(__wasi_fd_t fd, const __wasi_iovec_t *iovs, size_t iovs_len, __wasi_size_t *nread) { @@ -216,6 +221,103 @@ __wasi_errno_t __wasi_fd_read(__wasi_fd_t fd, const __wasi_iovec_t *iovs, if (__builtin_wasm_ref_is_null_extern(p)) { return __wasi_fd_read_orig(fd, iovs, iovs_len, nread); } - __wasi_errno_t res = __block_for_errno(p); - return res; + return __block_for_int(p); +} + +#include +#define POLLFD_FD 0 +#define POLLFD_EVENTS 4 +#define POLLFD_REVENTS 6 +#define POLLFD_SIZE 8 +_Static_assert(offsetof(struct pollfd, fd) == 0, "Unepxected pollfd struct layout"); +_Static_assert(offsetof(struct pollfd, events) == 4, "Unepxected pollfd struct layout"); +_Static_assert(offsetof(struct pollfd, revents) == 6, "Unepxected pollfd struct layout"); +_Static_assert(sizeof(struct pollfd) == 8, "Unepxected pollfd struct layout"); + +EM_JS_MACROS(__externref_t, __maybe_poll_async, (intptr_t fds, int nfds, int timeout), { + if (!WebAssembly.promising) { + return null; + } + return (async function() { + try { + var nonzero = 0; + var promises = []; + for (var i = 0; i < nfds; i++) { + var pollfd = fds + POLLFD_SIZE * i; + var fd = HEAP32[(pollfd + POLLFD_FD)/4]; + var events = HEAP16[(pollfd + POLLFD_EVENTS)/2]; + var mask = POLLNVAL; + var stream = FS.getStream(fd); + if (stream) { + mask = POLLIN | POLLOUT; + if (stream.stream_ops.pollAsync) { + promises.push(stream.stream_ops.pollAsync(stream, timeout).then((mask) => { + mask &= events | POLLERR | POLLHUP; + HEAP16[(pollfd + POLLFD_REVENTS)/2] = mask; + if (mask) { + nonzero ++; + } + })); + } else if (stream.stream_ops.poll) { + var mask = stream.stream_ops.poll(stream, timeout); + mask &= events | POLLERR | POLLHUP; + HEAP16[(pollfd + POLLFD_REVENTS)/2] = mask; + if (mask) { + nonzero ++; + } + } + } + } + await Promise.all(promises); + return nonzero; + } catch(e) { + if (e?.name !== "ErrnoError") throw e; + return -e["errno"]; + } + })(); +}); + +// Bind original poll syscall to syscall_poll_orig(). +int syscall_poll_orig(intptr_t fds, int nfds, int timeout) + __attribute__((__import_module__("env"), + __import_name__("__syscall_poll"), __warn_unused_result__)); + +int __syscall_poll(intptr_t fds, int nfds, int timeout) { + __externref_t p = __maybe_poll_async(fds, nfds, timeout); + if (__builtin_wasm_ref_is_null_extern(p)) { + return syscall_poll_orig(fds, nfds, timeout); + } + return __block_for_int(p); +} + +#include + +int syscall_ioctl_orig(int fd, int request, void* varargs) + __attribute__((__import_module__("env"), + __import_name__("__syscall_ioctl"), __warn_unused_result__)); + +int __syscall_ioctl(int fd, int request, void* varargs) { + if (request == FIOCLEX || request == FIONCLEX) { + return 0; + } + if (request == FIONBIO) { + // Implement FIONBIO via fcntl. + // TODO: Upstream this. + int flags = fcntl(fd, F_GETFL, 0); + int nonblock = **((int**)varargs); + if (flags < 0) { + return -errno; + } + if (nonblock) { + flags |= O_NONBLOCK; + } else { + flags &= (~O_NONBLOCK); + } + int res = fcntl(fd, F_SETFL, flags); + if (res < 0) { + return -errno; + } + return res; + } + return syscall_ioctl_orig(fd, request, varargs); } diff --git a/Python/getargs.c b/Python/getargs.c index 02f5c0e37ee42a..c119ca5c35398b 100644 --- a/Python/getargs.c +++ b/Python/getargs.c @@ -1,8 +1,6 @@ /* New getargs implementation */ -#include - #define PY_CXX_CONST const #include "Python.h" #include "pycore_abstract.h" // _PyNumber_Index() @@ -468,12 +466,9 @@ converttuple(PyObject *arg, const char **p_format, va_list *p_va, int flags, const char *format = *p_format; int i; Py_ssize_t len; - bool nullable = false; int istuple = PyTuple_Check(arg); int mustbetuple = istuple; - assert(*format == '('); - format++; for (;;) { int c = *format++; if (c == '(') { @@ -482,12 +477,8 @@ converttuple(PyObject *arg, const char **p_format, va_list *p_va, int flags, level++; } else if (c == ')') { - if (level == 0) { - if (*format == '?') { - nullable = true; - } + if (level == 0) break; - } level--; } else if (c == ':' || c == ';' || c == '\0') @@ -524,13 +515,6 @@ converttuple(PyObject *arg, const char **p_format, va_list *p_va, int flags, } } - if (arg == Py_None && nullable) { - const char *msg = skipitem(p_format, p_va, flags); - if (msg != NULL) { - levels[0] = 0; - } - return msg; - } if (istuple) { /* fallthrough */ } @@ -539,10 +523,9 @@ converttuple(PyObject *arg, const char **p_format, va_list *p_va, int flags, { levels[0] = 0; PyOS_snprintf(msgbuf, bufsize, - "must be %d-item tuple%s, not %.50s", - n, - nullable ? " or None" : "", - arg == Py_None ? "None" : Py_TYPE(arg)->tp_name); + "must be %d-item tuple, not %.50s", + n, + arg == Py_None ? "None" : Py_TYPE(arg)->tp_name); return msgbuf; } else { @@ -579,7 +562,7 @@ converttuple(PyObject *arg, const char **p_format, va_list *p_va, int flags, return msgbuf; } - format = *p_format + 1; + format = *p_format; for (i = 0; i < n; i++) { const char *msg; PyObject *item = PyTuple_GET_ITEM(arg, i); @@ -594,10 +577,6 @@ converttuple(PyObject *arg, const char **p_format, va_list *p_va, int flags, } } - format++; - if (*format == '?') { - format++; - } *p_format = format; if (!istuple) { Py_DECREF(arg); @@ -616,8 +595,11 @@ convertitem(PyObject *arg, const char **p_format, va_list *p_va, int flags, const char *format = *p_format; if (*format == '(' /* ')' */) { + format++; msg = converttuple(arg, &format, p_va, flags, levels, msgbuf, bufsize, freelist); + if (msg == NULL) + format++; } else { msg = convertsimple(arg, &format, p_va, flags, @@ -647,7 +629,7 @@ _PyArg_BadArgument(const char *fname, const char *displayname, } static const char * -converterr(bool nullable, const char *expected, PyObject *arg, char *msgbuf, size_t bufsize) +converterr(const char *expected, PyObject *arg, char *msgbuf, size_t bufsize) { assert(expected != NULL); assert(arg != NULL); @@ -657,23 +639,20 @@ converterr(bool nullable, const char *expected, PyObject *arg, char *msgbuf, siz } else { PyOS_snprintf(msgbuf, bufsize, - "must be %.50s%s, not %.50s", expected, - nullable ? " or None" : "", + "must be %.50s, not %.50s", expected, arg == Py_None ? "None" : Py_TYPE(arg)->tp_name); } return msgbuf; } static const char * -convertcharerr(bool nullable, const char *expected, const char *what, Py_ssize_t size, +convertcharerr(const char *expected, const char *what, Py_ssize_t size, char *msgbuf, size_t bufsize) { assert(expected != NULL); PyOS_snprintf(msgbuf, bufsize, - "must be %.50s%s, not %.50s of length %zd", - expected, - nullable ? " or None" : "", - what, size); + "must be %.50s, not %.50s of length %zd", + expected, what, size); return msgbuf; } @@ -693,26 +672,15 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, char *msgbuf, size_t bufsize, freelist_t *freelist) { #define RETURN_ERR_OCCURRED return msgbuf -#define HANDLE_NULLABLE \ - if (*format == '?') { \ - format++; \ - if (arg == Py_None) { \ - break; \ - } \ - nullable = true; \ - } - const char *format = *p_format; char c = *format++; const char *sarg; - bool nullable = false; switch (c) { case 'b': { /* unsigned byte -- very short int */ unsigned char *p = va_arg(*p_va, unsigned char *); - HANDLE_NULLABLE; long ival = PyLong_AsLong(arg); if (ival == -1 && PyErr_Occurred()) RETURN_ERR_OCCURRED; @@ -726,6 +694,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, "unsigned byte integer is greater than maximum"); RETURN_ERR_OCCURRED; } + else *p = (unsigned char) ival; break; } @@ -733,7 +702,6 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'B': {/* byte sized bitfield - both signed and unsigned values allowed */ unsigned char *p = va_arg(*p_va, unsigned char *); - HANDLE_NULLABLE; Py_ssize_t bytes = PyLong_AsNativeBytes(arg, p, sizeof(unsigned char), Py_ASNATIVEBYTES_NATIVE_ENDIAN | Py_ASNATIVEBYTES_ALLOW_INDEX | @@ -753,7 +721,6 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'h': {/* signed short int */ short *p = va_arg(*p_va, short *); - HANDLE_NULLABLE; long ival = PyLong_AsLong(arg); if (ival == -1 && PyErr_Occurred()) RETURN_ERR_OCCURRED; @@ -775,7 +742,6 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'H': { /* short int sized bitfield, both signed and unsigned allowed */ unsigned short *p = va_arg(*p_va, unsigned short *); - HANDLE_NULLABLE; Py_ssize_t bytes = PyLong_AsNativeBytes(arg, p, sizeof(unsigned short), Py_ASNATIVEBYTES_NATIVE_ENDIAN | Py_ASNATIVEBYTES_ALLOW_INDEX | @@ -795,7 +761,6 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'i': {/* signed int */ int *p = va_arg(*p_va, int *); - HANDLE_NULLABLE; long ival = PyLong_AsLong(arg); if (ival == -1 && PyErr_Occurred()) RETURN_ERR_OCCURRED; @@ -817,7 +782,6 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'I': { /* int sized bitfield, both signed and unsigned allowed */ unsigned int *p = va_arg(*p_va, unsigned int *); - HANDLE_NULLABLE; Py_ssize_t bytes = PyLong_AsNativeBytes(arg, p, sizeof(unsigned int), Py_ASNATIVEBYTES_NATIVE_ENDIAN | Py_ASNATIVEBYTES_ALLOW_INDEX | @@ -839,7 +803,6 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, { PyObject *iobj; Py_ssize_t *p = va_arg(*p_va, Py_ssize_t *); - HANDLE_NULLABLE; Py_ssize_t ival = -1; iobj = _PyNumber_Index(arg); if (iobj != NULL) { @@ -853,7 +816,6 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, } case 'l': {/* long int */ long *p = va_arg(*p_va, long *); - HANDLE_NULLABLE; long ival = PyLong_AsLong(arg); if (ival == -1 && PyErr_Occurred()) RETURN_ERR_OCCURRED; @@ -864,9 +826,8 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'k': { /* long sized bitfield */ unsigned long *p = va_arg(*p_va, unsigned long *); - HANDLE_NULLABLE; if (!PyIndex_Check(arg)) { - return converterr(nullable, "int", arg, msgbuf, bufsize); + return converterr("int", arg, msgbuf, bufsize); } Py_ssize_t bytes = PyLong_AsNativeBytes(arg, p, sizeof(unsigned long), Py_ASNATIVEBYTES_NATIVE_ENDIAN | @@ -887,7 +848,6 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'L': {/* long long */ long long *p = va_arg( *p_va, long long * ); - HANDLE_NULLABLE; long long ival = PyLong_AsLongLong(arg); if (ival == (long long)-1 && PyErr_Occurred()) RETURN_ERR_OCCURRED; @@ -898,9 +858,8 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'K': { /* long long sized bitfield */ unsigned long long *p = va_arg(*p_va, unsigned long long *); - HANDLE_NULLABLE; if (!PyIndex_Check(arg)) { - return converterr(nullable, "int", arg, msgbuf, bufsize); + return converterr("int", arg, msgbuf, bufsize); } Py_ssize_t bytes = PyLong_AsNativeBytes(arg, p, sizeof(unsigned long long), Py_ASNATIVEBYTES_NATIVE_ENDIAN | @@ -921,7 +880,6 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'f': {/* float */ float *p = va_arg(*p_va, float *); - HANDLE_NULLABLE; double dval = PyFloat_AsDouble(arg); if (dval == -1.0 && PyErr_Occurred()) RETURN_ERR_OCCURRED; @@ -932,7 +890,6 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'd': {/* double */ double *p = va_arg(*p_va, double *); - HANDLE_NULLABLE; double dval = PyFloat_AsDouble(arg); if (dval == -1.0 && PyErr_Occurred()) RETURN_ERR_OCCURRED; @@ -943,7 +900,6 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'D': {/* complex double */ Py_complex *p = va_arg(*p_va, Py_complex *); - HANDLE_NULLABLE; Py_complex cval; cval = PyComplex_AsCComplex(arg); if (PyErr_Occurred()) @@ -955,10 +911,9 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'c': {/* char */ char *p = va_arg(*p_va, char *); - HANDLE_NULLABLE; if (PyBytes_Check(arg)) { if (PyBytes_GET_SIZE(arg) != 1) { - return convertcharerr(nullable, "a byte string of length 1", + return convertcharerr("a byte string of length 1", "a bytes object", PyBytes_GET_SIZE(arg), msgbuf, bufsize); } @@ -966,28 +921,27 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, } else if (PyByteArray_Check(arg)) { if (PyByteArray_GET_SIZE(arg) != 1) { - return convertcharerr(nullable, "a byte string of length 1", + return convertcharerr("a byte string of length 1", "a bytearray object", PyByteArray_GET_SIZE(arg), msgbuf, bufsize); } *p = PyByteArray_AS_STRING(arg)[0]; } else - return converterr(nullable, "a byte string of length 1", arg, msgbuf, bufsize); + return converterr("a byte string of length 1", arg, msgbuf, bufsize); break; } case 'C': {/* unicode char */ int *p = va_arg(*p_va, int *); - HANDLE_NULLABLE; int kind; const void *data; if (!PyUnicode_Check(arg)) - return converterr(nullable, "a unicode character", arg, msgbuf, bufsize); + return converterr("a unicode character", arg, msgbuf, bufsize); if (PyUnicode_GET_LENGTH(arg) != 1) { - return convertcharerr(nullable, "a unicode character", + return convertcharerr("a unicode character", "a string", PyUnicode_GET_LENGTH(arg), msgbuf, bufsize); } @@ -1000,7 +954,6 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'p': {/* boolean *p*redicate */ int *p = va_arg(*p_va, int *); - HANDLE_NULLABLE; int val = PyObject_IsTrue(arg); if (val > 0) *p = 1; @@ -1019,31 +972,24 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, const char *buf; Py_ssize_t count; if (*format == '*') { - format++; - HANDLE_NULLABLE; if (getbuffer(arg, (Py_buffer*)p, &buf) < 0) - return converterr(nullable, buf, arg, msgbuf, bufsize); + return converterr(buf, arg, msgbuf, bufsize); + format++; if (addcleanup(p, freelist, cleanup_buffer)) { return converterr( - nullable, "(cleanup problem)", + "(cleanup problem)", arg, msgbuf, bufsize); } break; } - else if (*format == '#') { + count = convertbuffer(arg, (const void **)p, &buf); + if (count < 0) + return converterr(buf, arg, msgbuf, bufsize); + if (*format == '#') { Py_ssize_t *psize = va_arg(*p_va, Py_ssize_t*); - format++; - HANDLE_NULLABLE; - count = convertbuffer(arg, (const void **)p, &buf); - if (count < 0) - return converterr(nullable, buf, arg, msgbuf, bufsize); *psize = count; - } - else { - HANDLE_NULLABLE; - count = convertbuffer(arg, (const void **)p, &buf); - if (count < 0) - return converterr(nullable, buf, arg, msgbuf, bufsize); + format++; + } else { if (strlen(*p) != (size_t)count) { PyErr_SetString(PyExc_ValueError, "embedded null byte"); RETURN_ERR_OCCURRED; @@ -1059,35 +1005,32 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, /* "s*" or "z*" */ Py_buffer *p = (Py_buffer *)va_arg(*p_va, Py_buffer *); - format++; - HANDLE_NULLABLE; if (c == 'z' && arg == Py_None) PyBuffer_FillInfo(p, NULL, NULL, 0, 1, 0); else if (PyUnicode_Check(arg)) { Py_ssize_t len; sarg = PyUnicode_AsUTF8AndSize(arg, &len); if (sarg == NULL) - return converterr(nullable, CONV_UNICODE, + return converterr(CONV_UNICODE, arg, msgbuf, bufsize); PyBuffer_FillInfo(p, arg, (void *)sarg, len, 1, 0); } else { /* any bytes-like object */ const char *buf; if (getbuffer(arg, p, &buf) < 0) - return converterr(nullable, buf, arg, msgbuf, bufsize); + return converterr(buf, arg, msgbuf, bufsize); } if (addcleanup(p, freelist, cleanup_buffer)) { return converterr( - nullable, "(cleanup problem)", + "(cleanup problem)", arg, msgbuf, bufsize); } + format++; } else if (*format == '#') { /* a string or read-only bytes-like object */ /* "s#" or "z#" */ const void **p = (const void **)va_arg(*p_va, const char **); Py_ssize_t *psize = va_arg(*p_va, Py_ssize_t*); - format++; - HANDLE_NULLABLE; if (c == 'z' && arg == Py_None) { *p = NULL; *psize = 0; @@ -1096,7 +1039,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, Py_ssize_t len; sarg = PyUnicode_AsUTF8AndSize(arg, &len); if (sarg == NULL) - return converterr(nullable, CONV_UNICODE, + return converterr(CONV_UNICODE, arg, msgbuf, bufsize); *p = sarg; *psize = len; @@ -1106,22 +1049,22 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, const char *buf; Py_ssize_t count = convertbuffer(arg, p, &buf); if (count < 0) - return converterr(nullable, buf, arg, msgbuf, bufsize); + return converterr(buf, arg, msgbuf, bufsize); *psize = count; } + format++; } else { /* "s" or "z" */ const char **p = va_arg(*p_va, const char **); Py_ssize_t len; sarg = NULL; - HANDLE_NULLABLE; if (c == 'z' && arg == Py_None) *p = NULL; else if (PyUnicode_Check(arg)) { sarg = PyUnicode_AsUTF8AndSize(arg, &len); if (sarg == NULL) - return converterr(nullable, CONV_UNICODE, + return converterr(CONV_UNICODE, arg, msgbuf, bufsize); if (strlen(sarg) != (size_t)len) { PyErr_SetString(PyExc_ValueError, "embedded null character"); @@ -1130,7 +1073,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, *p = sarg; } else - return converterr(c == 'z' || nullable, "str", + return converterr(c == 'z' ? "str or None" : "str", arg, msgbuf, bufsize); } break; @@ -1159,46 +1102,13 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, recode_strings = 0; else return converterr( - nullable, "(unknown parser marker combination)", + "(unknown parser marker combination)", arg, msgbuf, bufsize); buffer = (char **)va_arg(*p_va, char **); format++; if (buffer == NULL) - return converterr(nullable, "(buffer is NULL)", + return converterr("(buffer is NULL)", arg, msgbuf, bufsize); - Py_ssize_t *psize = NULL; - if (*format == '#') { - /* Using buffer length parameter '#': - - - if *buffer is NULL, a new buffer of the - needed size is allocated and the data - copied into it; *buffer is updated to point - to the new buffer; the caller is - responsible for PyMem_Free()ing it after - usage - - - if *buffer is not NULL, the data is - copied to *buffer; *buffer_len has to be - set to the size of the buffer on input; - buffer overflow is signalled with an error; - buffer has to provide enough room for the - encoded string plus the trailing 0-byte - - - in both cases, *buffer_len is updated to - the size of the buffer /excluding/ the - trailing 0-byte - - */ - psize = va_arg(*p_va, Py_ssize_t*); - - format++; - if (psize == NULL) { - return converterr( - nullable, "(buffer_len is NULL)", - arg, msgbuf, bufsize); - } - } - HANDLE_NULLABLE; /* Encode object */ if (!recode_strings && @@ -1219,7 +1129,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, encoding, NULL); if (s == NULL) - return converterr(nullable, "(encoding failed)", + return converterr("(encoding failed)", arg, msgbuf, bufsize); assert(PyBytes_Check(s)); size = PyBytes_GET_SIZE(s); @@ -1229,15 +1139,42 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, } else { return converterr( - nullable, - recode_strings ? "str" - : nullable ? "str, bytes, bytearray" - : "str, bytes or bytearray", + recode_strings ? "str" : "str, bytes or bytearray", arg, msgbuf, bufsize); } /* Write output; output is guaranteed to be 0-terminated */ - if (psize != NULL) { + if (*format == '#') { + /* Using buffer length parameter '#': + + - if *buffer is NULL, a new buffer of the + needed size is allocated and the data + copied into it; *buffer is updated to point + to the new buffer; the caller is + responsible for PyMem_Free()ing it after + usage + + - if *buffer is not NULL, the data is + copied to *buffer; *buffer_len has to be + set to the size of the buffer on input; + buffer overflow is signalled with an error; + buffer has to provide enough room for the + encoded string plus the trailing 0-byte + + - in both cases, *buffer_len is updated to + the size of the buffer /excluding/ the + trailing 0-byte + + */ + Py_ssize_t *psize = va_arg(*p_va, Py_ssize_t*); + + format++; + if (psize == NULL) { + Py_DECREF(s); + return converterr( + "(buffer_len is NULL)", + arg, msgbuf, bufsize); + } if (*buffer == NULL) { *buffer = PyMem_NEW(char, size + 1); if (*buffer == NULL) { @@ -1248,7 +1185,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, if (addcleanup(buffer, freelist, cleanup_ptr)) { Py_DECREF(s); return converterr( - nullable, "(cleanup problem)", + "(cleanup problem)", arg, msgbuf, bufsize); } } else { @@ -1282,7 +1219,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, if ((Py_ssize_t)strlen(ptr) != size) { Py_DECREF(s); return converterr( - nullable, "encoded string without null bytes", + "encoded string without null bytes", arg, msgbuf, bufsize); } *buffer = PyMem_NEW(char, size + 1); @@ -1293,7 +1230,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, } if (addcleanup(buffer, freelist, cleanup_ptr)) { Py_DECREF(s); - return converterr(nullable, "(cleanup problem)", + return converterr("(cleanup problem)", arg, msgbuf, bufsize); } memcpy(*buffer, ptr, size+1); @@ -1304,32 +1241,29 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'S': { /* PyBytes object */ PyObject **p = va_arg(*p_va, PyObject **); - HANDLE_NULLABLE; if (PyBytes_Check(arg)) *p = arg; else - return converterr(nullable, "bytes", arg, msgbuf, bufsize); + return converterr("bytes", arg, msgbuf, bufsize); break; } case 'Y': { /* PyByteArray object */ PyObject **p = va_arg(*p_va, PyObject **); - HANDLE_NULLABLE; if (PyByteArray_Check(arg)) *p = arg; else - return converterr(nullable, "bytearray", arg, msgbuf, bufsize); + return converterr("bytearray", arg, msgbuf, bufsize); break; } case 'U': { /* PyUnicode object */ PyObject **p = va_arg(*p_va, PyObject **); - HANDLE_NULLABLE; if (PyUnicode_Check(arg)) { *p = arg; } else - return converterr(nullable, "str", arg, msgbuf, bufsize); + return converterr("str", arg, msgbuf, bufsize); break; } @@ -1340,11 +1274,10 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, type = va_arg(*p_va, PyTypeObject*); p = va_arg(*p_va, PyObject **); format++; - HANDLE_NULLABLE; if (PyType_IsSubtype(Py_TYPE(arg), type)) *p = arg; else - return converterr(nullable, type->tp_name, arg, msgbuf, bufsize); + return converterr(type->tp_name, arg, msgbuf, bufsize); } else if (*format == '&') { @@ -1353,18 +1286,16 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, void *addr = va_arg(*p_va, void *); int res; format++; - HANDLE_NULLABLE; if (! (res = (*convert)(arg, addr))) - return converterr(nullable, "(unspecified)", + return converterr("(unspecified)", arg, msgbuf, bufsize); if (res == Py_CLEANUP_SUPPORTED && addcleanup(addr, freelist, convert) == -1) - return converterr(nullable, "(cleanup problem)", + return converterr("(cleanup problem)", arg, msgbuf, bufsize); } else { p = va_arg(*p_va, PyObject **); - HANDLE_NULLABLE; *p = arg; } break; @@ -1376,30 +1307,29 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, if (*format != '*') return converterr( - nullable, "(invalid use of 'w' format character)", + "(invalid use of 'w' format character)", arg, msgbuf, bufsize); format++; - HANDLE_NULLABLE; /* Caller is interested in Py_buffer, and the object supports it directly. The request implicitly asks for PyBUF_SIMPLE, so the result is C-contiguous with format 'B'. */ if (PyObject_GetBuffer(arg, (Py_buffer*)p, PyBUF_WRITABLE) < 0) { PyErr_Clear(); - return converterr(nullable, "read-write bytes-like object", + return converterr("read-write bytes-like object", arg, msgbuf, bufsize); } assert(PyBuffer_IsContiguous((Py_buffer *)p, 'C')); if (addcleanup(p, freelist, cleanup_buffer)) { return converterr( - nullable, "(cleanup problem)", + "(cleanup problem)", arg, msgbuf, bufsize); } break; } default: - return converterr(nullable, "(impossible)", arg, msgbuf, bufsize); + return converterr("(impossible)", arg, msgbuf, bufsize); } @@ -2794,9 +2724,6 @@ skipitem(const char **p_format, va_list *p_va, int flags) return "impossible"; } - if (*format == '?') { - format++; - } *p_format = format; return NULL; diff --git a/Tools/wasm/emscripten/config.site-wasm32-emscripten b/Tools/wasm/emscripten/config.site-wasm32-emscripten index 8c3a338dacb2dc..9f98e3f3c3bb1f 100644 --- a/Tools/wasm/emscripten/config.site-wasm32-emscripten +++ b/Tools/wasm/emscripten/config.site-wasm32-emscripten @@ -69,7 +69,6 @@ ac_cv_func_posix_fallocate=no # Syscalls that resulted in a segfault ac_cv_func_utimensat=no -ac_cv_header_sys_ioctl_h=no # sockets are supported, but only AF_INET / AF_INET6 in non-blocking mode. # Disable AF_UNIX and AF_PACKET support, see socketmodule.h. diff --git a/Tools/wasm/emscripten/web_example/wasm_assets.py b/Tools/wasm/emscripten/wasm_assets.py similarity index 98% rename from Tools/wasm/emscripten/web_example/wasm_assets.py rename to Tools/wasm/emscripten/wasm_assets.py index deeb9229a4412b..b08e7ce1114a4a 100755 --- a/Tools/wasm/emscripten/web_example/wasm_assets.py +++ b/Tools/wasm/emscripten/wasm_assets.py @@ -18,7 +18,7 @@ from typing import Dict # source directory -SRCDIR = pathlib.Path(__file__).parents[4].absolute() +SRCDIR = pathlib.Path(__file__).parents[3].absolute() SRCDIR_LIB = SRCDIR / "Lib" @@ -84,7 +84,6 @@ "_json": ["json/"], "_multiprocessing": ["concurrent/futures/process.py", "multiprocessing/"], "pyexpat": ["xml/", "xmlrpc/"], - "readline": ["rlcompleter.py"], "_sqlite3": ["sqlite3/"], "_ssl": ["ssl.py"], "_tkinter": ["idlelib/", "tkinter/", "turtle.py", "turtledemo/"], diff --git a/Tools/wasm/emscripten/web_example_pyrepl_jspi/index.html b/Tools/wasm/emscripten/web_example_pyrepl_jspi/index.html new file mode 100644 index 00000000000000..1f72bd24e79a04 --- /dev/null +++ b/Tools/wasm/emscripten/web_example_pyrepl_jspi/index.html @@ -0,0 +1,34 @@ + + + + + + + +
+ + + diff --git a/Tools/wasm/emscripten/web_example_pyrepl_jspi/src.mjs b/Tools/wasm/emscripten/web_example_pyrepl_jspi/src.mjs new file mode 100644 index 00000000000000..5642372c9d2472 --- /dev/null +++ b/Tools/wasm/emscripten/web_example_pyrepl_jspi/src.mjs @@ -0,0 +1,194 @@ +// Much of this is adapted from here: +// https://github.com/mame/xterm-pty/blob/main/emscripten-pty.js +// Thanks to xterm-pty for making this possible! + +import createEmscriptenModule from "./python.mjs"; +import { openpty } from "https://unpkg.com/xterm-pty/index.mjs"; +import "https://unpkg.com/@xterm/xterm/lib/xterm.js"; + +var term = new Terminal(); +term.open(document.getElementById("terminal")); +const { master, slave: PTY } = openpty(); +term.loadAddon(master); +globalThis.PTY = PTY; + +async function setupStdlib(Module) { + const versionInt = Module.HEAPU32[Module._Py_Version >>> 2]; + const major = (versionInt >>> 24) & 0xff; + const minor = (versionInt >>> 16) & 0xff; + // Prevent complaints about not finding exec-prefix by making a lib-dynload directory + Module.FS.mkdirTree(`/lib/python${major}.${minor}/lib-dynload/`); + const resp = await fetch(`python${major}.${minor}.zip`); + const stdlibBuffer = await resp.arrayBuffer(); + Module.FS.writeFile( + `/lib/python${major}${minor}.zip`, + new Uint8Array(stdlibBuffer), + { canOwn: true }, + ); +} + +const tty_ops = { + ioctl_tcgets: () => { + const termios = PTY.ioctl("TCGETS"); + const data = { + c_iflag: termios.iflag, + c_oflag: termios.oflag, + c_cflag: termios.cflag, + c_lflag: termios.lflag, + c_cc: termios.cc, + }; + return data; + }, + + ioctl_tcsets: (_tty, _optional_actions, data) => { + PTY.ioctl("TCSETS", { + iflag: data.c_iflag, + oflag: data.c_oflag, + cflag: data.c_cflag, + lflag: data.c_lflag, + cc: data.c_cc, + }); + return 0; + }, + + ioctl_tiocgwinsz: () => PTY.ioctl("TIOCGWINSZ").reverse(), + + get_char: () => { + throw new Error("Should not happen"); + }, + put_char: () => { + throw new Error("Should not happen"); + }, + + fsync: () => {}, +}; + +const POLLIN = 1; +const POLLOUT = 4; + +const waitResult = { + READY: 0, + SIGNAL: 1, + TIMEOUT: 2, +}; + +function onReadable() { + var handle; + var promise = new Promise((resolve) => { + handle = PTY.onReadable(() => resolve(waitResult.READY)); + }); + return [promise, handle]; +} + +function onSignal() { + // TODO: signal handling + var handle = { dispose() {} }; + var promise = new Promise((resolve) => {}); + return [promise, handle]; +} + +function onTimeout(timeout) { + var id; + var promise = new Promise((resolve) => { + if (timeout > 0) { + id = setTimeout(resolve, timeout, waitResult.TIMEOUT); + } + }); + var handle = { + dispose() { + if (id) { + clearTimeout(id); + } + }, + }; + return [promise, handle]; +} + +async function waitForReadable(timeout) { + let p1, p2, p3; + let h1, h2, h3; + try { + [p1, h1] = onReadable(); + [p2, h2] = onTimeout(timeout); + [p3, h3] = onSignal(); + return await Promise.race([p1, p2, p3]); + } finally { + h1.dispose(); + h2.dispose(); + h3.dispose(); + } +} + +const FIONREAD = 0x541b; + +const tty_stream_ops = { + async readAsync(stream, buffer, offset, length, pos /* ignored */) { + let readBytes = PTY.read(length); + if (length && !readBytes.length) { + const status = await waitForReadable(-1); + if (status === waitResult.READY) { + readBytes = PTY.read(length); + } else { + throw new Error("Not implemented"); + } + } + buffer.set(readBytes, offset); + return readBytes.length; + }, + + write: (stream, buffer, offset, length) => { + // Note: default `buffer` is for some reason `HEAP8` (signed), while we want unsigned `HEAPU8`. + buffer = new Uint8Array( + buffer.buffer, + buffer.byteOffset, + buffer.byteLength, + ); + const toWrite = Array.from(buffer.subarray(offset, offset + length)); + PTY.write(toWrite); + return length; + }, + + async pollAsync(stream, timeout) { + if (!PTY.readable && timeout) { + await waitForReadable(timeout); + } + return (PTY.readable ? POLLIN : 0) | (PTY.writable ? POLLOUT : 0); + }, + ioctl(stream, request, varargs) { + if (request === FIONREAD) { + const res = PTY.fromLdiscToUpperBuffer.length; + Module.HEAPU32[varargs / 4] = res; + return 0; + } + throw new Error("Unimplemented ioctl request"); + }, +}; + +async function setupStdio(Module) { + Object.assign(Module.TTY.default_tty_ops, tty_ops); + Object.assign(Module.TTY.stream_ops, tty_stream_ops); +} + +const emscriptenSettings = { + async preRun(Module) { + Module.addRunDependency("pre-run"); + Module.ENV.TERM = "xterm-256color"; + // Uncomment next line to turn on tracing (messages go to browser console). + // Module.ENV.PYREPL_TRACE = "1"; + + // Leak module so we can try to show traceback if we crash on startup + globalThis.Module = Module; + await Promise.all([setupStdlib(Module), setupStdio(Module)]); + Module.removeRunDependency("pre-run"); + }, +}; + +try { + await createEmscriptenModule(emscriptenSettings); +} catch (e) { + // Show JavaScript exception and traceback + console.warn(e); + // Show Python exception and traceback + Module.__Py_DumpTraceback(2, Module._PyGILState_GetThisThreadState()); + process.exit(1); +} diff --git a/configure b/configure index ef47f9b0df73a8..8db2e9c46abba2 100755 --- a/configure +++ b/configure @@ -9603,7 +9603,7 @@ fi as_fn_append LDFLAGS_NODIST " -sWASM_BIGINT" as_fn_append LINKFORSHARED " -sFORCE_FILESYSTEM -lidbfs.js -lnodefs.js -lproxyfs.js -lworkerfs.js" - as_fn_append LINKFORSHARED " -sEXPORTED_RUNTIME_METHODS=FS,callMain,ENV,HEAPU32" + as_fn_append LINKFORSHARED " -sEXPORTED_RUNTIME_METHODS=FS,callMain,ENV,HEAPU32,TTY" as_fn_append LINKFORSHARED " -sEXPORTED_FUNCTIONS=_main,_Py_Version,__PyRuntime,__PyEM_EMSCRIPTEN_COUNT_ARGS_OFFSET,_PyGILState_GetThisThreadState,__Py_DumpTraceback" as_fn_append LINKFORSHARED " -sSTACK_SIZE=5MB" as_fn_append LINKFORSHARED " -sTEXTDECODER=2" @@ -31180,9 +31180,7 @@ case $ac_sys_system in #( - py_cv_module_fcntl=n/a py_cv_module_readline=n/a - py_cv_module_termios=n/a py_cv_module_=n/a ;; #( diff --git a/configure.ac b/configure.ac index 23ed9cd35bc94b..c839dd65a5fc5a 100644 --- a/configure.ac +++ b/configure.ac @@ -2335,7 +2335,7 @@ AS_CASE([$ac_sys_system], dnl Include file system support AS_VAR_APPEND([LINKFORSHARED], [" -sFORCE_FILESYSTEM -lidbfs.js -lnodefs.js -lproxyfs.js -lworkerfs.js"]) - AS_VAR_APPEND([LINKFORSHARED], [" -sEXPORTED_RUNTIME_METHODS=FS,callMain,ENV,HEAPU32"]) + AS_VAR_APPEND([LINKFORSHARED], [" -sEXPORTED_RUNTIME_METHODS=FS,callMain,ENV,HEAPU32,TTY"]) AS_VAR_APPEND([LINKFORSHARED], [" -sEXPORTED_FUNCTIONS=_main,_Py_Version,__PyRuntime,__PyEM_EMSCRIPTEN_COUNT_ARGS_OFFSET,_PyGILState_GetThisThreadState,__Py_DumpTraceback"]) AS_VAR_APPEND([LINKFORSHARED], [" -sSTACK_SIZE=5MB"]) dnl Avoid bugs in JS fallback string decoding path @@ -7768,9 +7768,7 @@ AS_CASE([$ac_sys_system], ) dnl fcntl, readline, and termios are not particularly useful in browsers. PY_STDLIB_MOD_SET_NA( - [fcntl], [readline], - [termios], ) ], [WASI], [