Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,9 @@ Bugs fixed
Patch by Jeremy Maitin-Shepard.
* #13939: LaTeX: page break can separate admonition title from contents.
Patch by Jean-François B.
* #14006: Use ``urllib.request.DataHandler`` in ``parse_data_uri`` to fix
parsing of non-base64 data URI.
Patch by Shengyu Zhang.


Testing
Expand Down
30 changes: 10 additions & 20 deletions sphinx/util/images.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@

from __future__ import annotations

import base64
from pathlib import Path
from typing import TYPE_CHECKING, NamedTuple, overload
from urllib.request import DataHandler, build_opener

import imagesize

Expand Down Expand Up @@ -90,26 +90,16 @@ def get_image_extension(mimetype: str) -> str | None:
def parse_data_uri(uri: str) -> DataURI | None:
if not uri.startswith('data:'):
return None
uri = uri[5:]

if ',' not in uri:
try:
response = build_opener(DataHandler).open(uri)
except ValueError as e:
msg = 'malformed data URI'
raise ValueError(msg)

# data:[<MIME-type>][;charset=<encoding>][;base64],<data>
mimetype = 'text/plain'
charset = 'US-ASCII'

properties, _, data = uri.partition(',')
for prop in properties.split(';'):
if prop == 'base64':
pass # skip
elif prop.startswith('charset='):
charset = prop[8:]
elif prop:
mimetype = prop

image_data = base64.b64decode(data)
raise ValueError(msg) from e
info = response.info()
mimetype = info.get_content_type() or 'text/plain'
charset = info.get_content_charset() or 'US-ASCII'
image_data = response.read()

return DataURI(mimetype, charset, image_data)


Expand Down
11 changes: 11 additions & 0 deletions tests/test_util/test_util_images.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,14 @@ def test_parse_data_uri() -> None:
)
with pytest.raises(ValueError, match=r'malformed data URI'):
parse_data_uri(uri)

# not base64
uri = (
'data:image/svg+xml,%3Csvg%20width%3D%22100%22%20height%3D%22100%22%20'
'xmlns%3D%22http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg%22%3E%3Ccircle%20cx'
'%3D%2250%22%20cy%3D%2250%22%20r%3D%2240%22%20fill%3D%22blue%22%2F%3E'
'%3C%2Fsvg%3E'
)
image = parse_data_uri(uri)
assert image is not None
assert image.mimetype == 'image/svg+xml'
Loading