Skip to content

Commit 9d4c09a

Browse files
[PR #10601/f7cac7e6 backport][3.12] Reduce WebSocket buffer slicing overhead (#10640)
**This is a backport of PR #10601 as merged into master (f7cac7e).** <!-- Thank you for your contribution! --> ## What do these changes do? Use a `const unsigned char *` for the buffer (Cython will automatically extract is using `__Pyx_PyBytes_AsUString`) as its a lot faster than copying around `PyBytes` objects. We do need to be careful that all slices are bounded and we bound check everything to make sure we do not do an out of bounds read since Cython does not bounds check C strings. I checked that all accesses to `buf_cstr` are proceeded by a bounds check but it would be good to get another set of eyes on that to verify in the `self._state == READ_PAYLOAD` block that we will never try to read out of bounds. <img width="376" alt="Screenshot 2025-03-19 at 10 21 54 AM" src="https://github.com/user-attachments/assets/a340ffa2-f09b-4aff-a4f7-c487dae186c8" /> ## Are there changes in behavior for the user? performance improvement ## Is it a substantial burden for the maintainers to support this? no There is a small risk that someone could remove a bounds check in the future and create a memory safety issue, however in this case its likely we would already be trying to read data that wasn't there if we are missing the bounds checking so the pure python version would throw if we are testing properly. Co-authored-by: J. Nick Koston <[email protected]>
1 parent 8d54f1f commit 9d4c09a

File tree

3 files changed

+13
-9
lines changed

3 files changed

+13
-9
lines changed

CHANGES/10601.misc.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Improved performance of WebSocket buffer handling -- by :user:`bdraco`.

aiohttp/_websocket/reader_c.pxd

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ cdef class WebSocketReader:
9393
chunk_size="unsigned int",
9494
chunk_len="unsigned int",
9595
buf_length="unsigned int",
96+
buf_cstr="const unsigned char *",
9697
first_byte="unsigned char",
9798
second_byte="unsigned char",
9899
end_pos="unsigned int",

aiohttp/_websocket/reader_py.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -328,14 +328,15 @@ def parse_frame(
328328

329329
start_pos: int = 0
330330
buf_length = len(buf)
331+
buf_cstr = buf
331332

332333
while True:
333334
# read header
334335
if self._state == READ_HEADER:
335336
if buf_length - start_pos < 2:
336337
break
337-
first_byte = buf[start_pos]
338-
second_byte = buf[start_pos + 1]
338+
first_byte = buf_cstr[start_pos]
339+
second_byte = buf_cstr[start_pos + 1]
339340
start_pos += 2
340341

341342
fin = (first_byte >> 7) & 1
@@ -400,14 +401,14 @@ def parse_frame(
400401
if length_flag == 126:
401402
if buf_length - start_pos < 2:
402403
break
403-
first_byte = buf[start_pos]
404-
second_byte = buf[start_pos + 1]
404+
first_byte = buf_cstr[start_pos]
405+
second_byte = buf_cstr[start_pos + 1]
405406
start_pos += 2
406407
self._payload_length = first_byte << 8 | second_byte
407408
elif length_flag > 126:
408409
if buf_length - start_pos < 8:
409410
break
410-
data = buf[start_pos : start_pos + 8]
411+
data = buf_cstr[start_pos : start_pos + 8]
411412
start_pos += 8
412413
self._payload_length = UNPACK_LEN3(data)[0]
413414
else:
@@ -419,7 +420,7 @@ def parse_frame(
419420
if self._state == READ_PAYLOAD_MASK:
420421
if buf_length - start_pos < 4:
421422
break
422-
self._frame_mask = buf[start_pos : start_pos + 4]
423+
self._frame_mask = buf_cstr[start_pos : start_pos + 4]
423424
start_pos += 4
424425
self._state = READ_PAYLOAD
425426

@@ -435,10 +436,10 @@ def parse_frame(
435436
if self._frame_payload_len:
436437
if type(self._frame_payload) is not bytearray:
437438
self._frame_payload = bytearray(self._frame_payload)
438-
self._frame_payload += buf[start_pos:end_pos]
439+
self._frame_payload += buf_cstr[start_pos:end_pos]
439440
else:
440441
# Fast path for the first frame
441-
self._frame_payload = buf[start_pos:end_pos]
442+
self._frame_payload = buf_cstr[start_pos:end_pos]
442443

443444
self._frame_payload_len += end_pos - start_pos
444445
start_pos = end_pos
@@ -464,6 +465,7 @@ def parse_frame(
464465
self._frame_payload_len = 0
465466
self._state = READ_HEADER
466467

467-
self._tail = buf[start_pos:] if start_pos < buf_length else b""
468+
# XXX: Cython needs slices to be bounded, so we can't omit the slice end here.
469+
self._tail = buf_cstr[start_pos:buf_length] if start_pos < buf_length else b""
468470

469471
return frames

0 commit comments

Comments
 (0)