Skip to content
Open
4 changes: 2 additions & 2 deletions .github/workflows/cifuzz.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,13 @@ jobs:
steps:
- name: Build Fuzzers
id: build
uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@675ddfb89ae1c614f1dfa99d18b91cd6d1d6b88b # master 2026-04-10
uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@9ff5089dbb11800055b6bc1af919a84b06dee2c8 # master 2026-04-27
with:
oss-fuzz-project-name: "python-multipart"
language: python

- name: Run Fuzzers
uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@675ddfb89ae1c614f1dfa99d18b91cd6d1d6b88b # master 2026-04-10
uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@9ff5089dbb11800055b6bc1af919a84b06dee2c8 # master 2026-04-27
with:
oss-fuzz-project-name: "python-multipart"
language: python
Expand Down
39 changes: 38 additions & 1 deletion python_multipart/multipart.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,27 @@ class MultipartState(IntEnum):
"""Default maximum size of a single multipart header line, including syntax overhead."""


def _split_mime_parameters(value: str) -> list[str]:
"""Split a MIME parameter string on semicolons that are outside quoted strings."""
parts: list[str] = []
start = 0
in_quotes = False
i = 0
while i < len(value):
c = value[i]
if in_quotes and c == "\\":
i += 2 # skip the escaped character
continue
if c == '"':
in_quotes = not in_quotes
elif c == ";" and not in_quotes:
parts.append(value[start:i])
start = i + 1
i += 1
parts.append(value[start:])
return parts


def parse_options_header(value: str | bytes | None) -> tuple[bytes, dict[bytes, bytes]]:
"""Parses a Content-Type header into a value in the following format: (content_type, {parameters})."""
# Uses email.message.Message to parse the header as described in PEP 594.
Expand All @@ -167,11 +188,27 @@ def parse_options_header(value: str | bytes | None) -> tuple[bytes, dict[bytes,
if ";" not in value:
return (value.lower().strip().encode("latin-1"), {})

ctype_part, params_part = value.split(";", 1)

# Pre-check for mixed RFC2231 parameter continuations (e.g., `filename*` and `filename*0*`).
# email.message.Message.get_params() handles these maliciously formed headers
# differently in Python 3.12 vs 3.13. We validate them here to ensure consistent behavior.
# _split_mime_parameters is used to avoid false positives from semicolons inside quoted values.
param_names = [p.split("=", 1)[0].strip().lower() for p in _split_mime_parameters(params_part) if "=" in p]
for name in param_names:
if "*" in name:
base, _, rest = name.partition("*")
if rest.rstrip("*").isdigit() and f"{base}*" in param_names:
return (ctype_part.lower().strip().encode("latin-1"), {})

# Split at the first semicolon, to get our value and then options.
# ctype, rest = value.split(b';', 1)
message = Message()
message["content-type"] = value
params = message.get_params()
try:
params = message.get_params()
except (TypeError, ValueError):
return (ctype_part.lower().strip().encode("latin-1"), {})
# If there were no parameters, this would have already returned above
assert params, "At least the content type value should be present"
ctype = params.pop(0)[0].encode("latin-1")
Expand Down
21 changes: 21 additions & 0 deletions tests/test_multipart.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,27 @@ def test_handles_rfc_2231(self) -> None:

self.assertEqual(p[b"param"], b"encoded message")

def test_rejects_oversized_rfc_2231_index(self) -> None:
t, p = parse_options_header("text/plain; filename*" + ("1" * 4301) + "*=utf-8''x")

self.assertEqual(t, b"text/plain")
self.assertEqual(p, {})

def test_rejects_mixed_rfc_2231_continuations(self) -> None:
t, p = parse_options_header("text/plain; filename*=utf-8''a; filename*0*=utf-8''b")

self.assertEqual(t, b"text/plain")
self.assertEqual(p, {})

def test_quoted_value_containing_rfc_2231_like_text(self) -> None:
# Ensure semicolons inside quotes are ignored so "fake" parameters don't cause false splits.
header = "text/plain; notes=\"a;filename*0*=utf-8''junk\"; filename*=utf-8''real.txt"
t, p = parse_options_header(header)

self.assertEqual(t, b"text/plain")
# The "filename*0*" inside the notes string should be ignored.
self.assertEqual(p[b"filename"], b"real.txt")


class TestBaseParser(unittest.TestCase):
def setUp(self) -> None:
Expand Down