[PR #10601/f7cac7e6 backport][3.12] Reduce WebSocket buffer slicing overhead (#10640)

patchback[bot] · bdraco · web-flow · commit 9d4c09a2e866 · 2025-03-30T22:16:09.000Z
**This is a backport of PR #10601 as merged into master (f7cac7e).**  ## What do these changes do? Use a `const unsigned char *` for the buffer (Cython will automatically extract is using `__Pyx_PyBytes_AsUString`) as its a lot faster than copying around `PyBytes` objects. We do need to be careful that all slices are bounded and we bound check everything to make sure we do not do an out of bounds read since Cython does not bounds check C strings. I checked that all accesses to `buf_cstr` are proceeded by a bounds check but it would be good to get another set of eyes on that to verify in the `self._state == READ_PAYLOAD` block that we will never try to read out of bounds. <img width="376" alt="Screenshot 2025-03-19 at 10 21 54 AM" src="https://github.com/user-attachments/assets/a340ffa2-f09b-4aff-a4f7-c487dae186c8" /> ## Are there changes in behavior for the user? performance improvement ## Is it a substantial burden for the maintainers to support this? no There is a small risk that someone could remove a bounds check in the future and create a memory safety issue, however in this case its likely we would already be trying to read data that wasn't there if we are missing the bounds checking so the pure python version would throw if we are testing properly. Co-authored-by: J. Nick Koston <nick@koston.org>
diff --git a/CHANGES/10601.misc.rst b/CHANGES/10601.misc.rst
@@ -0,0 +1 @@
+Improved performance of WebSocket buffer handling -- by :user:`bdraco`.
diff --git a/aiohttp/_websocket/reader_c.pxd b/aiohttp/_websocket/reader_c.pxd
@@ -93,6 +93,7 @@ cdef class WebSocketReader:
         chunk_size="unsigned int",
         chunk_len="unsigned int",
         buf_length="unsigned int",
+        buf_cstr="const unsigned char *",
         first_byte="unsigned char",
         second_byte="unsigned char",
         end_pos="unsigned int",
diff --git a/aiohttp/_websocket/reader_py.py b/aiohttp/_websocket/reader_py.py
@@ -328,14 +328,15 @@ def parse_frame(
 
         start_pos: int = 0
         buf_length = len(buf)
+        buf_cstr = buf
 
         while True:
             # read header
             if self._state == READ_HEADER:
                 if buf_length - start_pos < 2:
                     break
-                first_byte = buf[start_pos]
-                second_byte = buf[start_pos + 1]
+                first_byte = buf_cstr[start_pos]
+                second_byte = buf_cstr[start_pos + 1]
                 start_pos += 2
 
                 fin = (first_byte >> 7) & 1
@@ -400,14 +401,14 @@ def parse_frame(
                 if length_flag == 126:
                     if buf_length - start_pos < 2:
                         break
-                    first_byte = buf[start_pos]
-                    second_byte = buf[start_pos + 1]
+                    first_byte = buf_cstr[start_pos]
+                    second_byte = buf_cstr[start_pos + 1]
                     start_pos += 2
                     self._payload_length = first_byte << 8 | second_byte
                 elif length_flag > 126:
                     if buf_length - start_pos < 8:
                         break
-                    data = buf[start_pos : start_pos + 8]
+                    data = buf_cstr[start_pos : start_pos + 8]
                     start_pos += 8
                     self._payload_length = UNPACK_LEN3(data)[0]
                 else:
@@ -419,7 +420,7 @@ def parse_frame(
             if self._state == READ_PAYLOAD_MASK:
                 if buf_length - start_pos < 4:
                     break
-                self._frame_mask = buf[start_pos : start_pos + 4]
+                self._frame_mask = buf_cstr[start_pos : start_pos + 4]
                 start_pos += 4
                 self._state = READ_PAYLOAD
 
@@ -435,10 +436,10 @@ def parse_frame(
                 if self._frame_payload_len:
                     if type(self._frame_payload) is not bytearray:
                         self._frame_payload = bytearray(self._frame_payload)
-                    self._frame_payload += buf[start_pos:end_pos]
+                    self._frame_payload += buf_cstr[start_pos:end_pos]
                 else:
                     # Fast path for the first frame
-                    self._frame_payload = buf[start_pos:end_pos]
+                    self._frame_payload = buf_cstr[start_pos:end_pos]
 
                 self._frame_payload_len += end_pos - start_pos
                 start_pos = end_pos
@@ -464,6 +465,7 @@ def parse_frame(
                 self._frame_payload_len = 0
                 self._state = READ_HEADER
 
-        self._tail = buf[start_pos:] if start_pos < buf_length else b""
+        # XXX: Cython needs slices to be bounded, so we can't omit the slice end here.
+        self._tail = buf_cstr[start_pos:buf_length] if start_pos < buf_length else b""
 
         return frames

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	+Improved performance of WebSocket buffer handling -- by :user:`bdraco`.