Skip to content

Commit 16ef76a

Browse files
Merge pull request #91 from patrickfournier/url-encode
Url encode srcset values
2 parents 3a90b9e + 43abef5 commit 16ef76a

File tree

2 files changed

+157
-19
lines changed

2 files changed

+157
-19
lines changed

pelican/plugins/image_process/image_process.py

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import shutil
1818
import subprocess
1919
import sys
20+
import urllib
2021
from urllib.parse import unquote, urlparse
2122
from urllib.request import pathname2url, url2pathname
2223

@@ -362,7 +363,7 @@ def compute_paths(img, settings, derivative):
362363
derivative_path = os.path.join(process_dir, derivative)
363364
# urljoin truncates leading ../ elements
364365
base_url = posixpath.join(
365-
posixpath.dirname(img["src"]), pathname2url(derivative_path)
366+
posixpath.dirname(img["src"]), pathname2url(str(derivative_path))
366367
)
367368

368369
PELICAN_V4 = 4
@@ -397,7 +398,7 @@ def compute_paths(img, settings, derivative):
397398
src_path = img_src_path.lstrip("/")
398399
source = os.path.join(settings["PATH"], src_path)
399400
base_path = os.path.join(
400-
settings["OUTPUT_PATH"], os.path.dirname(src_path), derivative_path
401+
settings["OUTPUT_PATH"], os.path.dirname(src_path), str(derivative_path)
401402
)
402403

403404
return Path(base_url, source, base_path, filename)
@@ -408,14 +409,22 @@ def process_img_tag(img, settings, derivative):
408409
process = settings["IMAGE_PROCESS"][derivative]
409410

410411
img["src"] = posixpath.join(path.base_url, path.filename)
411-
destination = os.path.join(path.base_path, path.filename)
412+
destination = os.path.join(str(path.base_path), path.filename)
412413

413414
if not isinstance(process, list):
414415
process = process["ops"]
415416

416417
process_image((path.source, destination, process), settings)
417418

418419

420+
def format_srcset_element(path, condition):
421+
# space and comma have special meaning in srcset
422+
if " " in path or "," in path:
423+
path = urllib.parse.quote(path)
424+
425+
return f"{path} {condition}"
426+
427+
419428
def build_srcset(img, settings, derivative):
420429
path = compute_paths(img, settings, derivative)
421430
process = settings["IMAGE_PROCESS"][derivative]
@@ -434,7 +443,7 @@ def build_srcset(img, settings, derivative):
434443
default_name = default
435444
elif isinstance(default, list):
436445
default_name = "default"
437-
destination = os.path.join(path.base_path, default_name, path.filename)
446+
destination = os.path.join(str(path.base_path), default_name, path.filename)
438447
process_image((path.source, destination, default), settings)
439448

440449
img["src"] = posixpath.join(path.base_url, default_name, path.filename)
@@ -445,8 +454,8 @@ def build_srcset(img, settings, derivative):
445454
srcset = []
446455
for src in process["srcset"]:
447456
file_path = posixpath.join(path.base_url, src[0], path.filename)
448-
srcset.append(f"{file_path} {src[0]}")
449-
destination = os.path.join(path.base_path, src[0], path.filename)
457+
srcset.append(format_srcset_element(file_path, src[0]))
458+
destination = os.path.join(str(path.base_path), src[0], path.filename)
450459
process_image((path.source, destination, src[1]), settings)
451460

452461
if len(srcset) > 0:
@@ -532,12 +541,8 @@ def convert_div_to_picture_tag(soup, img, group, settings, derivative):
532541

533542
srcset = []
534543
for src in s["srcset"]:
535-
srcset.append(
536-
"{} {}".format(
537-
os.path.join(s["base_url"], s["name"], src[0], s["filename"]),
538-
src[0],
539-
)
540-
)
544+
url = os.path.join(s["base_url"], s["name"], src[0], s["filename"])
545+
srcset.append(format_srcset_element(str(url), src[0]))
541546

542547
source = os.path.join(settings["PATH"], s["url"][1:])
543548
destination = os.path.join(s["base_path"], s["name"], src[0], s["filename"])
@@ -644,12 +649,8 @@ def process_picture(soup, img, group, settings, derivative):
644649
for s in sources:
645650
srcset = []
646651
for src in s["srcset"]:
647-
srcset.append(
648-
"{} {}".format(
649-
posixpath.join(s["base_url"], s["name"], src[0], s["filename"]),
650-
src[0],
651-
)
652-
)
652+
url = posixpath.join(s["base_url"], s["name"], src[0], s["filename"])
653+
srcset.append(format_srcset_element(str(url), src[0]))
653654

654655
source = os.path.join(settings["PATH"], s["url"][1:])
655656
destination = os.path.join(s["base_path"], s["name"], src[0], s["filename"])

pelican/plugins/image_process/test_image_process.py

Lines changed: 138 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -499,7 +499,8 @@ def test_path_normalization(mocker, orig_src, orig_img, new_src, new_img):
499499
),
500500
],
501501
)
502-
def test_picture_generation(mocker, orig_tag, new_tag, call_args):
502+
def test_html_and_pictures_generation(mocker, orig_tag, new_tag, call_args):
503+
"""Tests that the generated html is as expected and the images are processed."""
503504
process = mocker.patch("pelican.plugins.image_process.image_process.process_image")
504505

505506
settings = get_settings(
@@ -508,6 +509,8 @@ def test_picture_generation(mocker, orig_tag, new_tag, call_args):
508509

509510
assert harvest_images_in_fragment(orig_tag, settings) == new_tag
510511

512+
# Check that process_image was called with the expected arguments
513+
# and in the expecter order.
511514
for i, (orig_img, new_img, transform_params) in enumerate(call_args):
512515
assert process.mock_calls[i] == mocker.call(
513516
(
@@ -519,6 +522,140 @@ def test_picture_generation(mocker, orig_tag, new_tag, call_args):
519522
)
520523

521524

525+
@pytest.mark.parametrize(
526+
"orig_tag, new_tag",
527+
[
528+
# <img/> src attribute with no quotes, spaces or commas.
529+
(
530+
'<img class="image-process-thumb" src="/tmp/my&amp;_dir/my!_test.jpg" />',
531+
'<img class="image-process-thumb" '
532+
'src="/tmp/my&amp;_dir/derivs/thumb/my!_test.jpg"/>',
533+
),
534+
# <img/> src attribute with double quotes, spaces and commas.
535+
(
536+
'<img class="image-process-thumb" '
537+
'src="/tmp/my,&quot; dir/my &#34;test,.jpg" />',
538+
'<img class="image-process-thumb" '
539+
"src='/tmp/my,\" dir/derivs/thumb/my \"test,.jpg'/>",
540+
),
541+
# <img/> src attribute with single and double quotes, spaces and commas.
542+
(
543+
'<img class="image-process-thumb" '
544+
'src="/tmp/m\'y,&quot; dir/my &#34;test,.jpg" />',
545+
'<img class="image-process-thumb" '
546+
'src="/tmp/m\'y,&quot; dir/derivs/thumb/my &quot;test,.jpg"/>',
547+
),
548+
# <img/> srcset attribute with no quotes, spaces or commas.
549+
(
550+
'<img class="image-process-crisp" src="/tmp/my&amp;_dir/my!_test.jpg" />',
551+
'<img class="image-process-crisp" '
552+
'src="/tmp/my&amp;_dir/derivs/crisp/1x/my!_test.jpg" '
553+
'srcset="/tmp/my&amp;_dir/derivs/crisp/1x/my!_test.jpg 1x, '
554+
"/tmp/my&amp;_dir/derivs/crisp/2x/my!_test.jpg 2x, "
555+
'/tmp/my&amp;_dir/derivs/crisp/4x/my!_test.jpg 4x"/>',
556+
),
557+
# <img/> srcset attribute with double quotes, spaces and commas.
558+
(
559+
'<img class="image-process-crisp" '
560+
'src="/tmp/my,&quot; dir/my &#34;test,.jpg" />',
561+
'<img class="image-process-crisp" '
562+
"src='/tmp/my,\" dir/derivs/crisp/1x/my \"test,.jpg' "
563+
'srcset="/tmp/my%2C%22%20dir/derivs/crisp/1x/my%20%22test%2C.jpg 1x, '
564+
"/tmp/my%2C%22%20dir/derivs/crisp/2x/my%20%22test%2C.jpg 2x, "
565+
'/tmp/my%2C%22%20dir/derivs/crisp/4x/my%20%22test%2C.jpg 4x"/>',
566+
),
567+
# <img/> srcset attribute with single and double quotes, spaces and commas.
568+
(
569+
'<img class="image-process-crisp" '
570+
'src="/tmp/m\'y,&quot; dir/my &#34;test,.jpg" />',
571+
'<img class="image-process-crisp" '
572+
'src="/tmp/m\'y,&quot; dir/derivs/crisp/1x/my &quot;test,.jpg" '
573+
'srcset="/tmp/m%27y%2C%22%20dir/derivs/crisp/1x/my%20%22test%2C.jpg 1x, '
574+
"/tmp/m%27y%2C%22%20dir/derivs/crisp/2x/my%20%22test%2C.jpg 2x, "
575+
'/tmp/m%27y%2C%22%20dir/derivs/crisp/4x/my%20%22test%2C.jpg 4x"/>',
576+
),
577+
# <picture/> src and srcset attributes with no quotes, spaces or commas.
578+
(
579+
'<picture><source class="source-1" '
580+
'src="/my&amp;_dir/my!_pelican-closeup.jpg"/><img '
581+
'class="image-process-pict" src="/my&amp;_dir/my!_pelican.jpg"/>'
582+
"</picture>",
583+
'<picture><source media="(min-width: 640px)" sizes="100vw" '
584+
'srcset="/my&amp;_dir/derivs/pict/default/640w/'
585+
"my!_pelican.jpg 640w, "
586+
"/my&amp;_dir/derivs/pict/default/1024w/my!_pelican.jpg 1024w, "
587+
'/my&amp;_dir/derivs/pict/default/1600w/my!_pelican.jpg 1600w"/>'
588+
'<source srcset="/my&amp;_dir/derivs/pict/source-1/1x/'
589+
"my!_pelican-closeup.jpg 1x, "
590+
"/my&amp;_dir/derivs/pict/source-1/2x/"
591+
'my!_pelican-closeup.jpg 2x"/><img '
592+
'class="image-process-pict" '
593+
'src="/my&amp;_dir/derivs/pict/default/640w/my!_pelican.jpg"/>'
594+
"</picture>",
595+
),
596+
# <picture/> src and srcset attributes with double quotes, spaces and commas.
597+
(
598+
'<picture><source class="source-1" '
599+
'src="/my,&quot; dir/my &#34;pelican-closeup,.jpg"/><img '
600+
'class="image-process-pict" src="/my,&quot; dir/my &#34;pelican,.jpg"/>'
601+
"</picture>",
602+
'<picture><source media="(min-width: 640px)" sizes="100vw" '
603+
'srcset="/my%2C%22%20dir/derivs/pict/default/640w/'
604+
"my%20%22pelican%2C.jpg 640w, "
605+
"/my%2C%22%20dir/derivs/pict/default/1024w/my%20%22pelican%2C.jpg 1024w, "
606+
'/my%2C%22%20dir/derivs/pict/default/1600w/my%20%22pelican%2C.jpg 1600w"/>'
607+
'<source srcset="/my%2C%22%20dir/derivs/pict/source-1/1x/'
608+
"my%20%22pelican-closeup%2C.jpg 1x, "
609+
"/my%2C%22%20dir/derivs/pict/source-1/2x/"
610+
'my%20%22pelican-closeup%2C.jpg 2x"/><img '
611+
'class="image-process-pict" '
612+
"src='/my,\" dir/derivs/pict/default/640w/my \"pelican,.jpg'/>"
613+
"</picture>",
614+
),
615+
# <picture/> src and srcset attributes
616+
# with single and double quotes, spaces and commas.
617+
(
618+
'<picture><source class="source-1" '
619+
'src="/m\'y,&quot; dir/my &#34;pelican-closeup,.jpg"/><img '
620+
'class="image-process-pict" src="/m\'y,&quot; dir/my &#34;pelican,.jpg"/>'
621+
"</picture>",
622+
'<picture><source media="(min-width: 640px)" sizes="100vw" '
623+
'srcset="/m%27y%2C%22%20dir/derivs/pict/default/640w/'
624+
"my%20%22pelican%2C.jpg 640w, "
625+
"/m%27y%2C%22%20dir/derivs/pict/default/1024w/"
626+
"my%20%22pelican%2C.jpg 1024w, "
627+
"/m%27y%2C%22%20dir/derivs/pict/default/1600w/"
628+
'my%20%22pelican%2C.jpg 1600w"/>'
629+
'<source srcset="/m%27y%2C%22%20dir/derivs/pict/source-1/1x/'
630+
"my%20%22pelican-closeup%2C.jpg 1x, "
631+
"/m%27y%2C%22%20dir/derivs/pict/source-1/2x/"
632+
'my%20%22pelican-closeup%2C.jpg 2x"/><img '
633+
'class="image-process-pict" '
634+
'src="/m\'y,&quot; dir/derivs/pict/default/640w/my &quot;pelican,.jpg"/>'
635+
"</picture>",
636+
),
637+
],
638+
)
639+
def test_special_chars_in_image_path_are_handled_properly(mocker, orig_tag, new_tag):
640+
"""Tests that special characters in image paths are handled properly.
641+
642+
For the src attribute, single or double quotes may need to be escaped,
643+
according to the quotation mark used to enclose the attribute value.
644+
645+
For the srcset attribute, in addition to quotes, spaces and commas
646+
need to be url-encoded.
647+
648+
Related to issue #78 https://github.com/pelican-plugins/image-process/issues/78
649+
"""
650+
mocker.patch("pelican.plugins.image_process.image_process.process_image")
651+
652+
settings = get_settings(
653+
IMAGE_PROCESS=COMPLEX_TRANSFORMS, IMAGE_PROCESS_DIR="derivs"
654+
)
655+
656+
assert harvest_images_in_fragment(orig_tag, settings) == new_tag
657+
658+
522659
def process_image_mock_exif_tool_started(image, settings):
523660
assert ExifTool._instance is not None
524661

0 commit comments

Comments
 (0)