Skip to content

Commit 094c76d

Browse files
committed
Fix SSE parsing to handle split CRLF across chunks
Previously, the SSE parser could incorrectly handle CRLF line endings when \r appeared at the end of one chunk and \n at the beginning of the next chunk, potentially treating them as two separate line breaks instead of a single CRLF sequence. This fix implements proper CRLF handling by: - Tracking when a chunk ends with \r using a skip_leading_lf flag - Skipping a leading \n in the next chunk if the previous ended with \r - Ensuring Unicode line/paragraph separators (U+2028/U+2029) are treated as regular content, not line breaks, per the SSE specification Added comprehensive test coverage for the edge case of split CRLF sequences across chunk boundaries.
1 parent e781013 commit 094c76d

File tree

2 files changed

+53
-7
lines changed

2 files changed

+53
-7
lines changed

src/mcp/client/sse.py

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -37,23 +37,47 @@ async def compliant_aiter_sse(event_source: EventSource) -> AsyncIterator[Server
3737
"""
3838
decoder = SSEDecoder()
3939
buffer = b""
40+
41+
# Split on "\r\n", "\r", or "\n" only, no other new line characters.
42+
# https://html.spec.whatwg.org/multipage/server-sent-events.html#parsing-an-event-stream
43+
44+
# Note: this is tricky, because we could have a "\r" at the end of a chunk and not yet
45+
# know if the next chunk starts with a "\n" or not.
46+
skip_leading_lf = False
4047

4148
async for chunk in event_source.response.aiter_bytes():
4249
buffer += chunk
4350

44-
# Split on "\n" only (not U+2028/U+2029 or other anything else)
45-
# https://html.spec.whatwg.org/multipage/server-sent-events.html#parsing-an-event-stream
46-
while b"\n" in buffer:
47-
line_bytes, buffer = buffer.split(b"\n", 1)
48-
line = line_bytes.decode('utf-8', errors='replace').rstrip("\r")
51+
while len(buffer) != 0:
52+
if skip_leading_lf and buffer.startswith(b"\n"):
53+
buffer = buffer[1:]
54+
skip_leading_lf = False
55+
56+
# Find first "\r" or "\n"
57+
cr = buffer.find(b"\r")
58+
lf = buffer.find(b"\n")
59+
pos = cr if lf == -1 else lf if cr == -1 else min(cr, lf)
60+
61+
if pos == -1:
62+
# No lines, need another chunk
63+
break
64+
65+
line_bytes = buffer[:pos]
66+
buffer = buffer[pos + 1:]
67+
68+
# If we have a CR first, skip any LF immediately after (may be in next chunk)
69+
skip_leading_lf = (pos == cr)
70+
71+
line = line_bytes.decode('utf-8', errors='replace')
4972
sse = decoder.decode(line)
5073
if sse is not None:
5174
yield sse
5275

5376
# Process any remaining data in buffer
5477
if buffer:
5578
assert b"\n" not in buffer
56-
line = buffer.decode('utf-8', errors='replace').rstrip("\r")
79+
assert b"\r" not in buffer
80+
line = buffer.decode('utf-8', errors='replace')
5781
sse = decoder.decode(line)
5882
if sse is not None:
5983
yield sse

tests/client/test_sse_unicode.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,4 +136,26 @@ async def test_compliant_aiter_sse_handles_multiple_events():
136136

137137
# Default event type is "message"
138138
assert events[2].event == "message"
139-
assert events[2].data == "No event name"
139+
assert events[2].data == "No event name"
140+
141+
142+
async def test_compliant_aiter_sse_handles_split_crlf():
143+
"""Test that \r at end of chunk followed by \n in next chunk is treated as one newline."""
144+
145+
# Test case where \r is at the end of one chunk and \n starts the next
146+
# This should be treated as a single CRLF line ending, not two separate newlines
147+
test_data = [
148+
b'event: test\r', # \r at end of chunk
149+
b'\ndata: line1\r', # \n at start of next chunk, then another \r at end
150+
b'\ndata: line2\n\n', # \n at start, completing the CRLF
151+
]
152+
153+
event_source = create_mock_event_source(test_data)
154+
155+
events = [event async for event in compliant_aiter_sse(event_source)]
156+
157+
# Should get exactly one event with both data lines
158+
assert len(events) == 1
159+
assert events[0].event == "test"
160+
# The SSE decoder concatenates multiple data fields with \n
161+
assert events[0].data == "line1\nline2"

0 commit comments

Comments
 (0)