diff --git a/Doc/library/contextlib.rst b/Doc/library/contextlib.rst
index f5b349441bcfee..e8f264f949807d 100644
--- a/Doc/library/contextlib.rst
+++ b/Doc/library/contextlib.rst
@@ -151,9 +151,9 @@ Functions and classes provided:
    created by :func:`asynccontextmanager` to meet the requirement that context
    managers support multiple invocations in order to be used as decorators.
 
-  .. versionchanged:: 3.10
-     Async context managers created with :func:`asynccontextmanager` can
-     be used as decorators.
+   .. versionchanged:: 3.10
+      Async context managers created with :func:`asynccontextmanager` can
+      be used as decorators.
 
 
 .. function:: closing(thing)
diff --git a/InternalDocs/garbage_collector.md b/InternalDocs/garbage_collector.md
index 5de4aa05398b55..272a0834cbfe24 100644
--- a/InternalDocs/garbage_collector.md
+++ b/InternalDocs/garbage_collector.md
@@ -17,26 +17,26 @@ value returned by this function is always 1 more as the function also has a refe
 to the object when called):
 
 ```pycon
-    >>> x = object()
-    >>> sys.getrefcount(x)
-    2
-    >>> y = x
-    >>> sys.getrefcount(x)
-    3
-    >>> del y
-    >>> sys.getrefcount(x)
-    2
+>>> x = object()
+>>> sys.getrefcount(x)
+2
+>>> y = x
+>>> sys.getrefcount(x)
+3
+>>> del y
+>>> sys.getrefcount(x)
+2
 ```
 
 The main problem with the reference counting scheme is that it does not handle reference
 cycles. For instance, consider this code:
 
 ```pycon
-    >>> container = []
-    >>> container.append(container)
-    >>> sys.getrefcount(container)
-    3
-    >>> del container
+>>> container = []
+>>> container.append(container)
+>>> sys.getrefcount(container)
+3
+>>> del container
 ```
 
 In this example, `container` holds a reference to itself, so even when we remove
@@ -199,26 +199,26 @@ variable `A`, and one self-referencing object which is completely
 unreachable:
 
 ```pycon
-    >>> import gc
-
-    >>> class Link:
-    ...    def __init__(self, next_link=None):
-    ...        self.next_link = next_link
-
-    >>> link_3 = Link()
-    >>> link_2 = Link(link_3)
-    >>> link_1 = Link(link_2)
-    >>> link_3.next_link = link_1
-    >>> A = link_1
-    >>> del link_1, link_2, link_3
-
-    >>> link_4 = Link()
-    >>> link_4.next_link = link_4
-    >>> del link_4
-
-    # Collect the unreachable Link object (and its .__dict__ dict).
-    >>> gc.collect()
-    2
+>>> import gc
+>>> 
+>>> class Link:
+...    def __init__(self, next_link=None):
+...        self.next_link = next_link
+...  
+>>> link_3 = Link()
+>>> link_2 = Link(link_3)
+>>> link_1 = Link(link_2)
+>>> link_3.next_link = link_1
+>>> A = link_1
+>>> del link_1, link_2, link_3
+>>> 
+>>> link_4 = Link()
+>>> link_4.next_link = link_4
+>>> del link_4
+>>> 
+>>> # Collect the unreachable Link object (and its .__dict__ dict).
+>>> gc.collect()
+2
 ```
 
 The GC starts with a set of candidate objects it wants to scan.  In the
@@ -439,9 +439,9 @@ These thresholds can be examined using the
 function:
 
 ```pycon
-    >>> import gc
-    >>> gc.get_threshold()
-    (700, 10, 10)
+>>> import gc
+>>> gc.get_threshold()
+(700, 10, 10)
 ```
 
 The content of these generations can be examined using the
@@ -449,38 +449,32 @@ The content of these generations can be examined using the
 specifically in a generation by calling `gc.collect(generation=NUM)`.
 
 ```pycon
-    >>> import gc
-    >>> class MyObj:
-    ...     pass
-    ...
-
-    # Move everything to the old generation so it's easier to inspect
-    # the young generation.
-
-    >>> gc.collect()
-    0
-
-    # Create a reference cycle.
-
-    >>> x = MyObj()
-    >>> x.self = x
-
-    # Initially the object is in the young generation.
-
-    >>> gc.get_objects(generation=0)
-    [..., <__main__.MyObj object at 0x7fbcc12a3400>, ...]
-
-    # After a collection of the youngest generation the object
-    # moves to the old generation.
-
-    >>> gc.collect(generation=0)
-    0
-    >>> gc.get_objects(generation=0)
-    []
-    >>> gc.get_objects(generation=1)
-    []
-    >>> gc.get_objects(generation=2)
-    [..., <__main__.MyObj object at 0x7fbcc12a3400>, ...]
+>>> import gc
+>>> class MyObj:
+...     pass
+...
+>>> # Move everything to the old generation so it's easier to inspect
+>>> # the young generation.
+>>> gc.collect()
+0
+>>> # Create a reference cycle.
+>>> x = MyObj()
+>>> x.self = x
+>>> 
+>>> # Initially the object is in the young generation.
+>>> gc.get_objects(generation=0)
+[..., <__main__.MyObj object at 0x7fbcc12a3400>, ...]
+>>> 
+>>> # After a collection of the youngest generation the object
+>>> # moves to the old generation.
+>>> gc.collect(generation=0)
+0
+>>> gc.get_objects(generation=0)
+[]
+>>> gc.get_objects(generation=1)
+[]
+>>> gc.get_objects(generation=2)
+[..., <__main__.MyObj object at 0x7fbcc12a3400>, ...]
 ```
 
 
@@ -563,18 +557,18 @@ the current tracking status of the object. Subsequent garbage collections may ch
 tracking status of the object.
 
 ```pycon
-      >>> gc.is_tracked(0)
-      False
-      >>> gc.is_tracked("a")
-      False
-      >>> gc.is_tracked([])
-      True
-      >>> gc.is_tracked(())
-      False
-      >>> gc.is_tracked({})
-      True
-      >>> gc.is_tracked({"a": 1})
-      True
+>>> gc.is_tracked(0)
+False
+>>> gc.is_tracked("a")
+False
+>>> gc.is_tracked([])
+True
+>>> gc.is_tracked(())
+False
+>>> gc.is_tracked({})
+True
+>>> gc.is_tracked({"a": 1})
+True
 ```
 
 Differences between GC implementations
diff --git a/Lib/test/test_threading.py b/Lib/test/test_threading.py
index fb6d268e5869f4..b666533466e578 100644
--- a/Lib/test/test_threading.py
+++ b/Lib/test/test_threading.py
@@ -1196,11 +1196,11 @@ def f():
             except RuntimeError:
                 print('ok')
             else:
-                print('skip')
+                print('!skip!')
         """
         _, out, err = assert_python_ok("-u", "-c", code)
         out = out.strip()
-        if out == b'skip':
+        if b'!skip!' in out:
             self.skipTest('RLIMIT_NPROC had no effect; probably superuser')
         self.assertEqual(out, b'ok')
         self.assertEqual(err, b'')
diff --git a/Lib/test/test_winconsoleio.py b/Lib/test/test_winconsoleio.py
index a10d63dfdc9753..d9076e77c158a2 100644
--- a/Lib/test/test_winconsoleio.py
+++ b/Lib/test/test_winconsoleio.py
@@ -142,6 +142,29 @@ def test_write_empty_data(self):
         with ConIO('CONOUT$', 'w') as f:
             self.assertEqual(f.write(b''), 0)
 
+    @requires_resource('console')
+    def test_write(self):
+        testcases = []
+        with ConIO('CONOUT$', 'w') as f:
+            for a in [
+                b'',
+                b'abc',
+                b'\xc2\xa7\xe2\x98\x83\xf0\x9f\x90\x8d',
+                b'\xff'*10,
+            ]:
+                for b in b'\xc2\xa7', b'\xe2\x98\x83', b'\xf0\x9f\x90\x8d':
+                    testcases.append(a + b)
+                    for i in range(1, len(b)):
+                        data = a + b[:i]
+                        testcases.append(data + b'z')
+                        testcases.append(data + b'\xff')
+                        # incomplete multibyte sequence
+                        with self.subTest(data=data):
+                            self.assertEqual(f.write(data), len(a))
+            for data in testcases:
+                with self.subTest(data=data):
+                    self.assertEqual(f.write(data), len(data))
+
     def assertStdinRoundTrip(self, text):
         stdin = open('CONIN$', 'r')
         old_stdin = sys.stdin
diff --git a/Misc/NEWS.d/next/Library/2024-09-13-18-24-27.gh-issue-124008.XaiPQx.rst b/Misc/NEWS.d/next/Library/2024-09-13-18-24-27.gh-issue-124008.XaiPQx.rst
new file mode 100644
index 00000000000000..cd6dd9a7a97e90
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-09-13-18-24-27.gh-issue-124008.XaiPQx.rst
@@ -0,0 +1,2 @@
+Fix possible crash (in debug build), incorrect output or returning incorrect
+value from raw binary ``write()`` when writing to console on Windows.
diff --git a/Misc/NEWS.d/next/Library/2024-11-27-14-06-35.gh-issue-123967.wxUmnW.rst b/Misc/NEWS.d/next/Library/2024-11-27-14-06-35.gh-issue-123967.wxUmnW.rst
new file mode 100644
index 00000000000000..788fe0c78ef257
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-11-27-14-06-35.gh-issue-123967.wxUmnW.rst
@@ -0,0 +1,2 @@
+Fix faulthandler for trampoline frames. If the top-most frame is a
+trampoline frame, skip it. Patch by Victor Stinner.
diff --git a/Modules/_io/winconsoleio.c b/Modules/_io/winconsoleio.c
index d7cb5abfdc0abd..3fa0301e337991 100644
--- a/Modules/_io/winconsoleio.c
+++ b/Modules/_io/winconsoleio.c
@@ -135,19 +135,67 @@ char _PyIO_get_console_type(PyObject *path_or_fd) {
 }
 
 static DWORD
-_find_last_utf8_boundary(const char *buf, DWORD len)
+_find_last_utf8_boundary(const unsigned char *buf, DWORD len)
 {
-    /* This function never returns 0, returns the original len instead */
-    DWORD count = 1;
-    if (len == 0 || (buf[len - 1] & 0x80) == 0) {
-        return len;
-    }
-    for (;; count++) {
-        if (count > 3 || count >= len) {
+    for (DWORD count = 1; count < 4 && count <= len; count++) {
+        unsigned char c = buf[len - count];
+        if (c < 0x80) {
+            /* No starting byte found. */
             return len;
         }
-        if ((buf[len - count] & 0xc0) != 0x80) {
-            return len - count;
+        if (c >= 0xc0) {
+            if (c < 0xe0 /* 2-bytes sequence */ ? count < 2 :
+                c < 0xf0 /* 3-bytes sequence */ ? count < 3 :
+                c < 0xf8 /* 4-bytes sequence */)
+            {
+                /* Incomplete multibyte sequence. */
+                return len - count;
+            }
+            /* Either complete or invalid sequence. */
+            return len;
+        }
+    }
+    /* Either complete 4-bytes sequence or invalid sequence. */
+    return len;
+}
+
+/* Find the number of UTF-8 bytes that corresponds to the specified number of
+ * wchars.
+ * I.e. find x <= len so that MultiByteToWideChar(CP_UTF8, 0, s, x, NULL, 0) == n.
+ *
+ * WideCharToMultiByte() cannot be used for this, because the UTF-8 -> wchar
+ * conversion is not reversible (invalid UTF-8 byte produces \ufffd which
+ * will be converted back to 3-bytes UTF-8 sequence \xef\xbf\xbd).
+ * So we need to use binary search.
+ */
+static DWORD
+_wchar_to_utf8_count(const unsigned char *s, DWORD len, DWORD n)
+{
+    DWORD start = 0;
+    while (1) {
+        DWORD mid = 0;
+        for (DWORD i = len / 2; i <= len; i++) {
+            mid = _find_last_utf8_boundary(s, i);
+            if (mid != 0) {
+                break;
+            }
+            /* The middle could split the first multibytes sequence. */
+        }
+        if (mid == len) {
+            return start + len;
+        }
+        if (mid == 0) {
+            mid = len > 1 ? len - 1 : 1;
+        }
+        DWORD wlen = MultiByteToWideChar(CP_UTF8, 0, s, mid, NULL, 0);
+        if (wlen <= n) {
+            s += mid;
+            start += mid;
+            len -= mid;
+            n -= wlen;
+        }
+        else {
+            len = mid;
         }
     }
 }
@@ -563,8 +611,10 @@ read_console_w(HANDLE handle, DWORD maxlen, DWORD *readlen) {
     int err = 0, sig = 0;
 
     wchar_t *buf = (wchar_t*)PyMem_Malloc(maxlen * sizeof(wchar_t));
-    if (!buf)
+    if (!buf) {
+        PyErr_NoMemory();
         goto error;
+    }
 
     *readlen = 0;
 
@@ -622,6 +672,7 @@ read_console_w(HANDLE handle, DWORD maxlen, DWORD *readlen) {
             Py_UNBLOCK_THREADS
             if (!newbuf) {
                 sig = -1;
+                PyErr_NoMemory();
                 break;
             }
             buf = newbuf;
@@ -645,8 +696,10 @@ read_console_w(HANDLE handle, DWORD maxlen, DWORD *readlen) {
     if (*readlen > 0 && buf[0] == L'\x1a') {
         PyMem_Free(buf);
         buf = (wchar_t *)PyMem_Malloc(sizeof(wchar_t));
-        if (!buf)
+        if (!buf) {
+            PyErr_NoMemory();
             goto error;
+        }
         buf[0] = L'\0';
         *readlen = 0;
     }
@@ -824,8 +877,10 @@ _io__WindowsConsoleIO_readall_impl(winconsoleio *self)
     bufsize = BUFSIZ;
 
     buf = (wchar_t*)PyMem_Malloc((bufsize + 1) * sizeof(wchar_t));
-    if (buf == NULL)
+    if (buf == NULL) {
+        PyErr_NoMemory();
         return NULL;
+    }
 
     while (1) {
         wchar_t *subbuf;
@@ -847,6 +902,7 @@ _io__WindowsConsoleIO_readall_impl(winconsoleio *self)
                                          (bufsize + 1) * sizeof(wchar_t));
             if (tmp == NULL) {
                 PyMem_Free(buf);
+                PyErr_NoMemory();
                 return NULL;
             }
             buf = tmp;
@@ -1022,43 +1078,49 @@ _io__WindowsConsoleIO_write_impl(winconsoleio *self, PyTypeObject *cls,
         len = (DWORD)b->len;
 
     Py_BEGIN_ALLOW_THREADS
-    wlen = MultiByteToWideChar(CP_UTF8, 0, b->buf, len, NULL, 0);
-
     /* issue11395 there is an unspecified upper bound on how many bytes
        can be written at once. We cap at 32k - the caller will have to
        handle partial writes.
        Since we don't know how many input bytes are being ignored, we
        have to reduce and recalculate. */
-    while (wlen > 32766 / sizeof(wchar_t)) {
-        len /= 2;
+    const DWORD max_wlen = 32766 / sizeof(wchar_t);
+    /* UTF-8 to wchar ratio is at most 3:1. */
+    len = Py_MIN(len, max_wlen * 3);
+    while (1) {
         /* Fix for github issues gh-110913 and gh-82052. */
         len = _find_last_utf8_boundary(b->buf, len);
         wlen = MultiByteToWideChar(CP_UTF8, 0, b->buf, len, NULL, 0);
+        if (wlen <= max_wlen) {
+            break;
+        }
+        len /= 2;
     }
     Py_END_ALLOW_THREADS
 
-    if (!wlen)
-        return PyErr_SetFromWindowsErr(0);
+    if (!wlen) {
+        return PyLong_FromLong(0);
+    }
 
     wbuf = (wchar_t*)PyMem_Malloc(wlen * sizeof(wchar_t));
+    if (!wbuf) {
+        PyErr_NoMemory();
+        return NULL;
+    }
 
     Py_BEGIN_ALLOW_THREADS
     wlen = MultiByteToWideChar(CP_UTF8, 0, b->buf, len, wbuf, wlen);
     if (wlen) {
         res = WriteConsoleW(handle, wbuf, wlen, &n, NULL);
+#ifdef Py_DEBUG
+        if (res) {
+#else
         if (res && n < wlen) {
+#endif
             /* Wrote fewer characters than expected, which means our
              * len value may be wrong. So recalculate it from the
-             * characters that were written. As this could potentially
-             * result in a different value, we also validate that value.
+             * characters that were written.
              */
-            len = WideCharToMultiByte(CP_UTF8, 0, wbuf, n,
-                NULL, 0, NULL, NULL);
-            if (len) {
-                wlen = MultiByteToWideChar(CP_UTF8, 0, b->buf, len,
-                    NULL, 0);
-                assert(wlen == len);
-            }
+            len = _wchar_to_utf8_count(b->buf, len, n);
         }
     } else
         res = 0;
diff --git a/Python/traceback.c b/Python/traceback.c
index 47b77c9108dd9a..e819909b6045c3 100644
--- a/Python/traceback.c
+++ b/Python/traceback.c
@@ -890,6 +890,8 @@ _Py_DumpASCII(int fd, PyObject *text)
 static void
 dump_frame(int fd, _PyInterpreterFrame *frame)
 {
+    assert(frame->owner != FRAME_OWNED_BY_CSTACK);
+
     PyCodeObject *code =_PyFrame_GetCode(frame);
     PUTS(fd, "  File ");
     if (code->co_filename != NULL
@@ -963,6 +965,17 @@ dump_traceback(int fd, PyThreadState *tstate, int write_header)
 
     unsigned int depth = 0;
     while (1) {
+        if (frame->owner == FRAME_OWNED_BY_CSTACK) {
+            /* Trampoline frame */
+            frame = frame->previous;
+            if (frame == NULL) {
+                break;
+            }
+
+            /* Can't have more than one shim frame in a row */
+            assert(frame->owner != FRAME_OWNED_BY_CSTACK);
+        }
+
         if (MAX_FRAME_DEPTH <= depth) {
             if (MAX_FRAME_DEPTH < depth) {
                 PUTS(fd, "plus ");
@@ -971,20 +984,12 @@ dump_traceback(int fd, PyThreadState *tstate, int write_header)
             }
             break;
         }
+
         dump_frame(fd, frame);
         frame = frame->previous;
         if (frame == NULL) {
             break;
         }
-        if (frame->owner == FRAME_OWNED_BY_CSTACK) {
-            /* Trampoline frame */
-            frame = frame->previous;
-        }
-        if (frame == NULL) {
-            break;
-        }
-        /* Can't have more than one shim frame in a row */
-        assert(frame->owner != FRAME_OWNED_BY_CSTACK);
         depth++;
     }
 }