diff --git a/Doc/library/contextlib.rst b/Doc/library/contextlib.rst index f5b349441bcfee..e8f264f949807d 100644 --- a/Doc/library/contextlib.rst +++ b/Doc/library/contextlib.rst @@ -151,9 +151,9 @@ Functions and classes provided: created by :func:`asynccontextmanager` to meet the requirement that context managers support multiple invocations in order to be used as decorators. - .. versionchanged:: 3.10 - Async context managers created with :func:`asynccontextmanager` can - be used as decorators. + .. versionchanged:: 3.10 + Async context managers created with :func:`asynccontextmanager` can + be used as decorators. .. function:: closing(thing) diff --git a/InternalDocs/garbage_collector.md b/InternalDocs/garbage_collector.md index 5de4aa05398b55..272a0834cbfe24 100644 --- a/InternalDocs/garbage_collector.md +++ b/InternalDocs/garbage_collector.md @@ -17,26 +17,26 @@ value returned by this function is always 1 more as the function also has a refe to the object when called): ```pycon - >>> x = object() - >>> sys.getrefcount(x) - 2 - >>> y = x - >>> sys.getrefcount(x) - 3 - >>> del y - >>> sys.getrefcount(x) - 2 +>>> x = object() +>>> sys.getrefcount(x) +2 +>>> y = x +>>> sys.getrefcount(x) +3 +>>> del y +>>> sys.getrefcount(x) +2 ``` The main problem with the reference counting scheme is that it does not handle reference cycles. For instance, consider this code: ```pycon - >>> container = [] - >>> container.append(container) - >>> sys.getrefcount(container) - 3 - >>> del container +>>> container = [] +>>> container.append(container) +>>> sys.getrefcount(container) +3 +>>> del container ``` In this example, `container` holds a reference to itself, so even when we remove @@ -199,26 +199,26 @@ variable `A`, and one self-referencing object which is completely unreachable: ```pycon - >>> import gc - - >>> class Link: - ... def __init__(self, next_link=None): - ... self.next_link = next_link - - >>> link_3 = Link() - >>> link_2 = Link(link_3) - >>> link_1 = Link(link_2) - >>> link_3.next_link = link_1 - >>> A = link_1 - >>> del link_1, link_2, link_3 - - >>> link_4 = Link() - >>> link_4.next_link = link_4 - >>> del link_4 - - # Collect the unreachable Link object (and its .__dict__ dict). - >>> gc.collect() - 2 +>>> import gc +>>> +>>> class Link: +... def __init__(self, next_link=None): +... self.next_link = next_link +... +>>> link_3 = Link() +>>> link_2 = Link(link_3) +>>> link_1 = Link(link_2) +>>> link_3.next_link = link_1 +>>> A = link_1 +>>> del link_1, link_2, link_3 +>>> +>>> link_4 = Link() +>>> link_4.next_link = link_4 +>>> del link_4 +>>> +>>> # Collect the unreachable Link object (and its .__dict__ dict). +>>> gc.collect() +2 ``` The GC starts with a set of candidate objects it wants to scan. In the @@ -439,9 +439,9 @@ These thresholds can be examined using the function: ```pycon - >>> import gc - >>> gc.get_threshold() - (700, 10, 10) +>>> import gc +>>> gc.get_threshold() +(700, 10, 10) ``` The content of these generations can be examined using the @@ -449,38 +449,32 @@ The content of these generations can be examined using the specifically in a generation by calling `gc.collect(generation=NUM)`. ```pycon - >>> import gc - >>> class MyObj: - ... pass - ... - - # Move everything to the old generation so it's easier to inspect - # the young generation. - - >>> gc.collect() - 0 - - # Create a reference cycle. - - >>> x = MyObj() - >>> x.self = x - - # Initially the object is in the young generation. - - >>> gc.get_objects(generation=0) - [..., <__main__.MyObj object at 0x7fbcc12a3400>, ...] - - # After a collection of the youngest generation the object - # moves to the old generation. - - >>> gc.collect(generation=0) - 0 - >>> gc.get_objects(generation=0) - [] - >>> gc.get_objects(generation=1) - [] - >>> gc.get_objects(generation=2) - [..., <__main__.MyObj object at 0x7fbcc12a3400>, ...] +>>> import gc +>>> class MyObj: +... pass +... +>>> # Move everything to the old generation so it's easier to inspect +>>> # the young generation. +>>> gc.collect() +0 +>>> # Create a reference cycle. +>>> x = MyObj() +>>> x.self = x +>>> +>>> # Initially the object is in the young generation. +>>> gc.get_objects(generation=0) +[..., <__main__.MyObj object at 0x7fbcc12a3400>, ...] +>>> +>>> # After a collection of the youngest generation the object +>>> # moves to the old generation. +>>> gc.collect(generation=0) +0 +>>> gc.get_objects(generation=0) +[] +>>> gc.get_objects(generation=1) +[] +>>> gc.get_objects(generation=2) +[..., <__main__.MyObj object at 0x7fbcc12a3400>, ...] ``` @@ -563,18 +557,18 @@ the current tracking status of the object. Subsequent garbage collections may ch tracking status of the object. ```pycon - >>> gc.is_tracked(0) - False - >>> gc.is_tracked("a") - False - >>> gc.is_tracked([]) - True - >>> gc.is_tracked(()) - False - >>> gc.is_tracked({}) - True - >>> gc.is_tracked({"a": 1}) - True +>>> gc.is_tracked(0) +False +>>> gc.is_tracked("a") +False +>>> gc.is_tracked([]) +True +>>> gc.is_tracked(()) +False +>>> gc.is_tracked({}) +True +>>> gc.is_tracked({"a": 1}) +True ``` Differences between GC implementations diff --git a/Lib/test/test_threading.py b/Lib/test/test_threading.py index fb6d268e5869f4..b666533466e578 100644 --- a/Lib/test/test_threading.py +++ b/Lib/test/test_threading.py @@ -1196,11 +1196,11 @@ def f(): except RuntimeError: print('ok') else: - print('skip') + print('!skip!') """ _, out, err = assert_python_ok("-u", "-c", code) out = out.strip() - if out == b'skip': + if b'!skip!' in out: self.skipTest('RLIMIT_NPROC had no effect; probably superuser') self.assertEqual(out, b'ok') self.assertEqual(err, b'') diff --git a/Lib/test/test_winconsoleio.py b/Lib/test/test_winconsoleio.py index a10d63dfdc9753..d9076e77c158a2 100644 --- a/Lib/test/test_winconsoleio.py +++ b/Lib/test/test_winconsoleio.py @@ -142,6 +142,29 @@ def test_write_empty_data(self): with ConIO('CONOUT$', 'w') as f: self.assertEqual(f.write(b''), 0) + @requires_resource('console') + def test_write(self): + testcases = [] + with ConIO('CONOUT$', 'w') as f: + for a in [ + b'', + b'abc', + b'\xc2\xa7\xe2\x98\x83\xf0\x9f\x90\x8d', + b'\xff'*10, + ]: + for b in b'\xc2\xa7', b'\xe2\x98\x83', b'\xf0\x9f\x90\x8d': + testcases.append(a + b) + for i in range(1, len(b)): + data = a + b[:i] + testcases.append(data + b'z') + testcases.append(data + b'\xff') + # incomplete multibyte sequence + with self.subTest(data=data): + self.assertEqual(f.write(data), len(a)) + for data in testcases: + with self.subTest(data=data): + self.assertEqual(f.write(data), len(data)) + def assertStdinRoundTrip(self, text): stdin = open('CONIN$', 'r') old_stdin = sys.stdin diff --git a/Misc/NEWS.d/next/Library/2024-09-13-18-24-27.gh-issue-124008.XaiPQx.rst b/Misc/NEWS.d/next/Library/2024-09-13-18-24-27.gh-issue-124008.XaiPQx.rst new file mode 100644 index 00000000000000..cd6dd9a7a97e90 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-09-13-18-24-27.gh-issue-124008.XaiPQx.rst @@ -0,0 +1,2 @@ +Fix possible crash (in debug build), incorrect output or returning incorrect +value from raw binary ``write()`` when writing to console on Windows. diff --git a/Misc/NEWS.d/next/Library/2024-11-27-14-06-35.gh-issue-123967.wxUmnW.rst b/Misc/NEWS.d/next/Library/2024-11-27-14-06-35.gh-issue-123967.wxUmnW.rst new file mode 100644 index 00000000000000..788fe0c78ef257 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-27-14-06-35.gh-issue-123967.wxUmnW.rst @@ -0,0 +1,2 @@ +Fix faulthandler for trampoline frames. If the top-most frame is a +trampoline frame, skip it. Patch by Victor Stinner. diff --git a/Modules/_io/winconsoleio.c b/Modules/_io/winconsoleio.c index d7cb5abfdc0abd..3fa0301e337991 100644 --- a/Modules/_io/winconsoleio.c +++ b/Modules/_io/winconsoleio.c @@ -135,19 +135,67 @@ char _PyIO_get_console_type(PyObject *path_or_fd) { } static DWORD -_find_last_utf8_boundary(const char *buf, DWORD len) +_find_last_utf8_boundary(const unsigned char *buf, DWORD len) { - /* This function never returns 0, returns the original len instead */ - DWORD count = 1; - if (len == 0 || (buf[len - 1] & 0x80) == 0) { - return len; - } - for (;; count++) { - if (count > 3 || count >= len) { + for (DWORD count = 1; count < 4 && count <= len; count++) { + unsigned char c = buf[len - count]; + if (c < 0x80) { + /* No starting byte found. */ return len; } - if ((buf[len - count] & 0xc0) != 0x80) { - return len - count; + if (c >= 0xc0) { + if (c < 0xe0 /* 2-bytes sequence */ ? count < 2 : + c < 0xf0 /* 3-bytes sequence */ ? count < 3 : + c < 0xf8 /* 4-bytes sequence */) + { + /* Incomplete multibyte sequence. */ + return len - count; + } + /* Either complete or invalid sequence. */ + return len; + } + } + /* Either complete 4-bytes sequence or invalid sequence. */ + return len; +} + +/* Find the number of UTF-8 bytes that corresponds to the specified number of + * wchars. + * I.e. find x <= len so that MultiByteToWideChar(CP_UTF8, 0, s, x, NULL, 0) == n. + * + * WideCharToMultiByte() cannot be used for this, because the UTF-8 -> wchar + * conversion is not reversible (invalid UTF-8 byte produces \ufffd which + * will be converted back to 3-bytes UTF-8 sequence \xef\xbf\xbd). + * So we need to use binary search. + */ +static DWORD +_wchar_to_utf8_count(const unsigned char *s, DWORD len, DWORD n) +{ + DWORD start = 0; + while (1) { + DWORD mid = 0; + for (DWORD i = len / 2; i <= len; i++) { + mid = _find_last_utf8_boundary(s, i); + if (mid != 0) { + break; + } + /* The middle could split the first multibytes sequence. */ + } + if (mid == len) { + return start + len; + } + if (mid == 0) { + mid = len > 1 ? len - 1 : 1; + } + DWORD wlen = MultiByteToWideChar(CP_UTF8, 0, s, mid, NULL, 0); + if (wlen <= n) { + s += mid; + start += mid; + len -= mid; + n -= wlen; + } + else { + len = mid; } } } @@ -563,8 +611,10 @@ read_console_w(HANDLE handle, DWORD maxlen, DWORD *readlen) { int err = 0, sig = 0; wchar_t *buf = (wchar_t*)PyMem_Malloc(maxlen * sizeof(wchar_t)); - if (!buf) + if (!buf) { + PyErr_NoMemory(); goto error; + } *readlen = 0; @@ -622,6 +672,7 @@ read_console_w(HANDLE handle, DWORD maxlen, DWORD *readlen) { Py_UNBLOCK_THREADS if (!newbuf) { sig = -1; + PyErr_NoMemory(); break; } buf = newbuf; @@ -645,8 +696,10 @@ read_console_w(HANDLE handle, DWORD maxlen, DWORD *readlen) { if (*readlen > 0 && buf[0] == L'\x1a') { PyMem_Free(buf); buf = (wchar_t *)PyMem_Malloc(sizeof(wchar_t)); - if (!buf) + if (!buf) { + PyErr_NoMemory(); goto error; + } buf[0] = L'\0'; *readlen = 0; } @@ -824,8 +877,10 @@ _io__WindowsConsoleIO_readall_impl(winconsoleio *self) bufsize = BUFSIZ; buf = (wchar_t*)PyMem_Malloc((bufsize + 1) * sizeof(wchar_t)); - if (buf == NULL) + if (buf == NULL) { + PyErr_NoMemory(); return NULL; + } while (1) { wchar_t *subbuf; @@ -847,6 +902,7 @@ _io__WindowsConsoleIO_readall_impl(winconsoleio *self) (bufsize + 1) * sizeof(wchar_t)); if (tmp == NULL) { PyMem_Free(buf); + PyErr_NoMemory(); return NULL; } buf = tmp; @@ -1022,43 +1078,49 @@ _io__WindowsConsoleIO_write_impl(winconsoleio *self, PyTypeObject *cls, len = (DWORD)b->len; Py_BEGIN_ALLOW_THREADS - wlen = MultiByteToWideChar(CP_UTF8, 0, b->buf, len, NULL, 0); - /* issue11395 there is an unspecified upper bound on how many bytes can be written at once. We cap at 32k - the caller will have to handle partial writes. Since we don't know how many input bytes are being ignored, we have to reduce and recalculate. */ - while (wlen > 32766 / sizeof(wchar_t)) { - len /= 2; + const DWORD max_wlen = 32766 / sizeof(wchar_t); + /* UTF-8 to wchar ratio is at most 3:1. */ + len = Py_MIN(len, max_wlen * 3); + while (1) { /* Fix for github issues gh-110913 and gh-82052. */ len = _find_last_utf8_boundary(b->buf, len); wlen = MultiByteToWideChar(CP_UTF8, 0, b->buf, len, NULL, 0); + if (wlen <= max_wlen) { + break; + } + len /= 2; } Py_END_ALLOW_THREADS - if (!wlen) - return PyErr_SetFromWindowsErr(0); + if (!wlen) { + return PyLong_FromLong(0); + } wbuf = (wchar_t*)PyMem_Malloc(wlen * sizeof(wchar_t)); + if (!wbuf) { + PyErr_NoMemory(); + return NULL; + } Py_BEGIN_ALLOW_THREADS wlen = MultiByteToWideChar(CP_UTF8, 0, b->buf, len, wbuf, wlen); if (wlen) { res = WriteConsoleW(handle, wbuf, wlen, &n, NULL); +#ifdef Py_DEBUG + if (res) { +#else if (res && n < wlen) { +#endif /* Wrote fewer characters than expected, which means our * len value may be wrong. So recalculate it from the - * characters that were written. As this could potentially - * result in a different value, we also validate that value. + * characters that were written. */ - len = WideCharToMultiByte(CP_UTF8, 0, wbuf, n, - NULL, 0, NULL, NULL); - if (len) { - wlen = MultiByteToWideChar(CP_UTF8, 0, b->buf, len, - NULL, 0); - assert(wlen == len); - } + len = _wchar_to_utf8_count(b->buf, len, n); } } else res = 0; diff --git a/Python/traceback.c b/Python/traceback.c index 47b77c9108dd9a..e819909b6045c3 100644 --- a/Python/traceback.c +++ b/Python/traceback.c @@ -890,6 +890,8 @@ _Py_DumpASCII(int fd, PyObject *text) static void dump_frame(int fd, _PyInterpreterFrame *frame) { + assert(frame->owner != FRAME_OWNED_BY_CSTACK); + PyCodeObject *code =_PyFrame_GetCode(frame); PUTS(fd, " File "); if (code->co_filename != NULL @@ -963,6 +965,17 @@ dump_traceback(int fd, PyThreadState *tstate, int write_header) unsigned int depth = 0; while (1) { + if (frame->owner == FRAME_OWNED_BY_CSTACK) { + /* Trampoline frame */ + frame = frame->previous; + if (frame == NULL) { + break; + } + + /* Can't have more than one shim frame in a row */ + assert(frame->owner != FRAME_OWNED_BY_CSTACK); + } + if (MAX_FRAME_DEPTH <= depth) { if (MAX_FRAME_DEPTH < depth) { PUTS(fd, "plus "); @@ -971,20 +984,12 @@ dump_traceback(int fd, PyThreadState *tstate, int write_header) } break; } + dump_frame(fd, frame); frame = frame->previous; if (frame == NULL) { break; } - if (frame->owner == FRAME_OWNED_BY_CSTACK) { - /* Trampoline frame */ - frame = frame->previous; - } - if (frame == NULL) { - break; - } - /* Can't have more than one shim frame in a row */ - assert(frame->owner != FRAME_OWNED_BY_CSTACK); depth++; } }