Skip to content

Commit 6c6ecfa

Browse files
Fix multipart reading with split boundary (#8657)
1 parent f93cba4 commit 6c6ecfa

File tree

3 files changed

+77
-4
lines changed

3 files changed

+77
-4
lines changed

CHANGES/8653.bugfix.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fixed multipart reading when stream buffer splits the boundary over several read() calls -- by :user:`Dreamsorcerer`.

aiohttp/multipart.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,7 @@ def __init__(
266266
) -> None:
267267
self.headers = headers
268268
self._boundary = boundary
269+
self._boundary_len = len(boundary) + 2 # Boundary + \r\n
269270
self._content = content
270271
self._default_charset = default_charset
271272
self._at_eof = False
@@ -371,15 +372,25 @@ async def _read_chunk_from_stream(self, size: int) -> bytes:
371372
# Reads content chunk of body part with unknown length.
372373
# The Content-Length header for body part is not necessary.
373374
assert (
374-
size >= len(self._boundary) + 2
375+
size >= self._boundary_len
375376
), "Chunk size must be greater or equal than boundary length + 2"
376377
first_chunk = self._prev_chunk is None
377378
if first_chunk:
378379
self._prev_chunk = await self._content.read(size)
379380

380-
chunk = await self._content.read(size)
381-
self._content_eof += int(self._content.at_eof())
382-
assert self._content_eof < 3, "Reading after EOF"
381+
chunk = b""
382+
# content.read() may return less than size, so we need to loop to ensure
383+
# we have enough data to detect the boundary.
384+
while len(chunk) < self._boundary_len:
385+
chunk += await self._content.read(size)
386+
self._content_eof += int(self._content.at_eof())
387+
assert self._content_eof < 3, "Reading after EOF"
388+
if self._content_eof:
389+
break
390+
if len(chunk) > size:
391+
self._content.unread_data(chunk[size:])
392+
chunk = chunk[:size]
393+
383394
assert self._prev_chunk is not None
384395
window = self._prev_chunk + chunk
385396
sub = b"\r\n" + self._boundary

tests/test_multipart.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import io
44
import json
55
import pathlib
6+
import sys
67
import zlib
78
from typing import Any, Optional
89
from unittest import mock
@@ -657,6 +658,66 @@ async def test_invalid_boundary(self) -> None:
657658
with pytest.raises(ValueError):
658659
await reader.next()
659660

661+
@pytest.mark.skipif(sys.version_info < (3, 10), reason="Needs anext()")
662+
async def test_read_boundary_across_chunks(self) -> None:
663+
class SplitBoundaryStream:
664+
def __init__(self) -> None:
665+
self.content = [
666+
b"--foobar\r\n\r\n",
667+
b"Hello,\r\n-",
668+
b"-fo",
669+
b"ob",
670+
b"ar\r\n",
671+
b"\r\nwor",
672+
b"ld!",
673+
b"\r\n--f",
674+
b"oobar--",
675+
]
676+
677+
async def read(self, size: Optional[Any] = None) -> bytes:
678+
chunk = self.content.pop(0)
679+
assert len(chunk) <= size
680+
return chunk
681+
682+
def at_eof(self) -> bool:
683+
return not self.content
684+
685+
async def readline(self) -> bytes:
686+
line = b""
687+
while self.content and b"\n" not in line:
688+
line += self.content.pop(0)
689+
line, *extra = line.split(b"\n", maxsplit=1)
690+
if extra and extra[0]:
691+
self.content.insert(0, extra[0])
692+
return line + b"\n"
693+
694+
def unread_data(self, data: bytes) -> None:
695+
if self.content:
696+
self.content[0] = data + self.content[0]
697+
else:
698+
self.content.append(data)
699+
700+
stream = SplitBoundaryStream()
701+
reader = aiohttp.MultipartReader(
702+
{CONTENT_TYPE: 'multipart/related;boundary="foobar"'}, stream
703+
)
704+
part = await anext(reader)
705+
result = await part.read_chunk(10)
706+
assert result == b"Hello,"
707+
result = await part.read_chunk(10)
708+
assert result == b""
709+
assert part.at_eof()
710+
711+
part = await anext(reader)
712+
result = await part.read_chunk(10)
713+
assert result == b"world!"
714+
result = await part.read_chunk(10)
715+
assert result == b""
716+
assert part.at_eof()
717+
718+
with pytest.raises(StopAsyncIteration):
719+
await anext(reader)
720+
660721
async def test_release(self) -> None:
661722
with Stream(
662723
b"--:\r\n"

0 commit comments

Comments
 (0)