Skip to content

Commit 1bc8d53

Browse files
[PR #8657/6c6ecfaf backport][3.10] Fix multipart reading with split boundary (#8658)
**This is a backport of PR #8657 as merged into master (6c6ecfa).** --------- Co-authored-by: Sam Bull <[email protected]>
1 parent 491106e commit 1bc8d53

File tree

3 files changed

+77
-4
lines changed

3 files changed

+77
-4
lines changed

CHANGES/8653.bugfix.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fixed multipart reading when stream buffer splits the boundary over several read() calls -- by :user:`Dreamsorcerer`.

aiohttp/multipart.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,7 @@ def __init__(
266266
) -> None:
267267
self.headers = headers
268268
self._boundary = boundary
269+
self._boundary_len = len(boundary) + 2 # Boundary + \r\n
269270
self._content = content
270271
self._default_charset = default_charset
271272
self._at_eof = False
@@ -346,15 +347,25 @@ async def _read_chunk_from_stream(self, size: int) -> bytes:
346347
# Reads content chunk of body part with unknown length.
347348
# The Content-Length header for body part is not necessary.
348349
assert (
349-
size >= len(self._boundary) + 2
350+
size >= self._boundary_len
350351
), "Chunk size must be greater or equal than boundary length + 2"
351352
first_chunk = self._prev_chunk is None
352353
if first_chunk:
353354
self._prev_chunk = await self._content.read(size)
354355

355-
chunk = await self._content.read(size)
356-
self._content_eof += int(self._content.at_eof())
357-
assert self._content_eof < 3, "Reading after EOF"
356+
chunk = b""
357+
# content.read() may return less than size, so we need to loop to ensure
358+
# we have enough data to detect the boundary.
359+
while len(chunk) < self._boundary_len:
360+
chunk += await self._content.read(size)
361+
self._content_eof += int(self._content.at_eof())
362+
assert self._content_eof < 3, "Reading after EOF"
363+
if self._content_eof:
364+
break
365+
if len(chunk) > size:
366+
self._content.unread_data(chunk[size:])
367+
chunk = chunk[:size]
368+
358369
assert self._prev_chunk is not None
359370
window = self._prev_chunk + chunk
360371
sub = b"\r\n" + self._boundary

tests/test_multipart.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import io
33
import json
44
import pathlib
5+
import sys
56
import zlib
67
from unittest import mock
78

@@ -754,6 +755,66 @@ async def test_invalid_boundary(self) -> None:
754755
with pytest.raises(ValueError):
755756
await reader.next()
756757

758+
@pytest.mark.skipif(sys.version_info < (3, 10), reason="Needs anext()")
759+
async def test_read_boundary_across_chunks(self) -> None:
760+
class SplitBoundaryStream:
761+
def __init__(self) -> None:
762+
self.content = [
763+
b"--foobar\r\n\r\n",
764+
b"Hello,\r\n-",
765+
b"-fo",
766+
b"ob",
767+
b"ar\r\n",
768+
b"\r\nwor",
769+
b"ld!",
770+
b"\r\n--f",
771+
b"oobar--",
772+
]
773+
774+
async def read(self, size=None) -> bytes:
775+
chunk = self.content.pop(0)
776+
assert len(chunk) <= size
777+
return chunk
778+
779+
def at_eof(self) -> bool:
780+
return not self.content
781+
782+
async def readline(self) -> bytes:
783+
line = b""
784+
while self.content and b"\n" not in line:
785+
line += self.content.pop(0)
786+
line, *extra = line.split(b"\n", maxsplit=1)
787+
if extra and extra[0]:
788+
self.content.insert(0, extra[0])
789+
return line + b"\n"
790+
791+
def unread_data(self, data: bytes) -> None:
792+
if self.content:
793+
self.content[0] = data + self.content[0]
794+
else:
795+
self.content.append(data)
796+
797+
stream = SplitBoundaryStream()
798+
reader = aiohttp.MultipartReader(
799+
{CONTENT_TYPE: 'multipart/related;boundary="foobar"'}, stream
800+
)
801+
part = await anext(reader)
802+
result = await part.read_chunk(10)
803+
assert result == b"Hello,"
804+
result = await part.read_chunk(10)
805+
assert result == b""
806+
assert part.at_eof()
807+
808+
part = await anext(reader)
809+
result = await part.read_chunk(10)
810+
assert result == b"world!"
811+
result = await part.read_chunk(10)
812+
assert result == b""
813+
assert part.at_eof()
814+
815+
with pytest.raises(StopAsyncIteration):
816+
await anext(reader)
817+
757818
async def test_release(self) -> None:
758819
with Stream(
759820
newline.join(

0 commit comments

Comments
 (0)