Skip to content

Commit 29bafa9

Browse files
Process images in content metadata (#99)
1 parent 4a0b3c4 commit 29bafa9

File tree

4 files changed

+232
-9
lines changed

4 files changed

+232
-9
lines changed

README.md

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,9 @@ classes. It then maps the classes to a set of image processing
4444
instructions, computes new images, and modifies HTML code according to
4545
the instructions.
4646

47+
It can optionally scan the metadata of your content to update image URLs
48+
there.
49+
4750
### Define Transformations
4851

4952
The first step in using this module is to define some image
@@ -517,6 +520,60 @@ IMAGE_PROCESS_CLASS_PREFIX = "custom-prefix-"
517520
IMAGE_PROCESS_ADD_CLASS = False
518521
```
519522

523+
#### Converting Image Paths to URLs in Metadata
524+
525+
If you want *Image Process* to process images in the metadata
526+
of your content (for example, in the `og_image` field used by the `seo` and `pelican-open_graph` plugins),
527+
you can set the `IMAGE_PROCESS_METADATA` setting to a dictionary mapping
528+
metadata field names to transformation names. The transformation must be defined
529+
in the `IMAGE_PROCESS` setting as usual, and it must be
530+
an image replacement transformation (i.e., of type `image`).
531+
For example:
532+
533+
```python
534+
# pelicanconf.py
535+
536+
IMAGE_PROCESS_METADATA = {
537+
"og_image": "og-image-transform",
538+
}
539+
540+
IMAGE_PROCESS = {
541+
'og-image-transform': {"type": "image",
542+
"ops": ["scale_in 800 640 True"],
543+
},
544+
# ... possibly other transformations ...
545+
}
546+
```
547+
548+
*Image Process* will look for the specified
549+
metadata fields in your content and will apply the specified transformation
550+
to the image path found in the metadata value.
551+
552+
It is possible to override the transformation applied to a specific instance of a metadata field by prefixing
553+
the metadata value with `{transformation-name}`, where `transformation-name` is the name
554+
of a transformation in the `IMAGE_PROCESS` dictionary. For example, if you have defined
555+
`IMAGE_PROCESS_METADATA` as above, you can override the transformation for a specific article
556+
by setting its `og_image` metadata value to `{some-special-transformation}/path/to/image.jpg`,
557+
where `some-special-transformation` is a transformation defined in the `IMAGE_PROCESS`
558+
dictionary. Here is an example article using this feature:
559+
560+
```markdown
561+
# Example article
562+
Title: Example Article
563+
Date: 2024-06-01
564+
og_image: {some-special-transformation}/images/special-image.jpg
565+
566+
This article uses a special image for Open Graph.
567+
```
568+
569+
If you only want to process metadata fields for some articles, you can set the transformation to `None`
570+
in `IMAGE_PROCESS_METADATA` and add a `{transform-name}` prefix to the metadata value of
571+
selected articles.
572+
573+
*Image Process* will update the metadata field to the URL of the transformed image.
574+
The original metadata values are saved in the `image_process_original_metadata` dictionary
575+
of the content object, so that you can access them later if needed.
576+
520577
## Known Issues
521578

522579
* Pillow, when resizing animated GIF files, [does not return an animated file](https://github.com/pelican-plugins/image-process/issues/11).

RELEASE.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Release type: minor
2+
3+
- Process images in content metadata via new `IMAGE_PROCESS_METADATA` setting

pelican/plugins/image_process/image_process.py

Lines changed: 81 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
import subprocess
1919
import sys
2020
import urllib
21-
from urllib.parse import unquote, urlparse
21+
from urllib.parse import unquote, urljoin, urlparse
2222
from urllib.request import pathname2url, url2pathname
2323

2424
from bs4 import BeautifulSoup
@@ -389,15 +389,21 @@ def harvest_images_in_fragment(fragment, settings):
389389
return str(soup)
390390

391391

392-
def compute_paths(img, settings, derivative):
392+
def compute_paths(image_url, settings, derivative):
393+
# Backwards compatibility: accept either a string (image_url) or
394+
# a dict (img with "src" key)
395+
if isinstance(image_url, dict):
396+
image_url = image_url.get("src", "")
397+
logger.warning(f"{LOG_PREFIX} Deprecated use of dict for image_url.")
398+
393399
process_dir = settings["IMAGE_PROCESS_DIR"]
394-
img_src = urlparse(img["src"])
400+
img_src = urlparse(image_url)
395401
img_src_path = url2pathname(img_src.path.lstrip("/"))
396402
_img_src_dirname, filename = os.path.split(img_src_path)
397403
derivative_path = os.path.join(process_dir, derivative)
398404
# urljoin truncates leading ../ elements
399405
base_url = posixpath.join(
400-
posixpath.dirname(img["src"]), pathname2url(str(derivative_path))
406+
posixpath.dirname(image_url), pathname2url(str(derivative_path))
401407
)
402408

403409
PELICAN_V4 = 4
@@ -439,7 +445,7 @@ def compute_paths(img, settings, derivative):
439445

440446

441447
def process_img_tag(img, settings, derivative):
442-
path = compute_paths(img, settings, derivative)
448+
path = compute_paths(img["src"], settings, derivative)
443449
process = settings["IMAGE_PROCESS"][derivative]
444450

445451
img["src"] = posixpath.join(path.base_url, path.filename)
@@ -465,7 +471,7 @@ def format_srcset_element(path, condition):
465471

466472

467473
def build_srcset(img, settings, derivative):
468-
path = compute_paths(img, settings, derivative)
474+
path = compute_paths(img["src"], settings, derivative)
469475
process = settings["IMAGE_PROCESS"][derivative]
470476

471477
default = process["default"]
@@ -768,6 +774,74 @@ def process_image(image, settings):
768774
return i.width, i.height
769775

770776

777+
def process_metadata(generator, metadata):
778+
set_default_settings(generator.context)
779+
metadata_to_process = generator.context.get("IMAGE_PROCESS_METADATA", {}).keys()
780+
site_url = generator.context.get("SITEURL", "")
781+
782+
original_values = {}
783+
784+
for key, value in metadata.items():
785+
if isinstance(value, str) and key in metadata_to_process:
786+
derivative = generator.context["IMAGE_PROCESS_METADATA"][key]
787+
# If value starts with {some-other-derivative}, override derivative
788+
if value.startswith("{") and "}" in value:
789+
end_brace = value.index("}")
790+
derivative = value[1:end_brace]
791+
value = value[end_brace + 1 :].lstrip() # noqa: PLW2901
792+
793+
if derivative is None:
794+
continue
795+
796+
# Ignore Pelican special linking directives to avoid conflicts.
797+
# Extracted from Pelican function _link_replacer() in contents.py
798+
special_file_locations = {
799+
"filename",
800+
"attach",
801+
"static",
802+
"category",
803+
"tag",
804+
"author",
805+
"index",
806+
}
807+
if derivative in special_file_locations:
808+
logger.warning(
809+
f"{LOG_PREFIX} Skipping metadata key '{key}' "
810+
f"because it uses Pelican linking directive '{derivative}'."
811+
)
812+
continue
813+
814+
try:
815+
process = generator.context["IMAGE_PROCESS"][derivative]
816+
except KeyError as e:
817+
raise RuntimeError(f"Derivative {derivative} undefined.") from e
818+
819+
if not (
820+
isinstance(process, list)
821+
or (isinstance(process, dict) and process["type"] == "image")
822+
):
823+
raise RuntimeError(
824+
f'IMAGE_PROCESS_METADATA "{key}" must reference a transformation '
825+
'of type "image".'
826+
)
827+
828+
path = compute_paths(value, generator.context, derivative)
829+
830+
original_values[key] = value
831+
metadata[key] = urljoin(
832+
site_url, posixpath.join(path.base_url, path.filename)
833+
)
834+
destination = os.path.join(str(path.base_path), path.filename)
835+
836+
if not isinstance(process, list):
837+
process = process["ops"]
838+
839+
process_image((path.source, destination, process), generator.context)
840+
841+
if original_values:
842+
metadata["image_process_original_metadata"] = original_values
843+
844+
771845
def dump_config(pelican):
772846
set_default_settings(pelican.settings)
773847

@@ -779,6 +853,7 @@ def dump_config(pelican):
779853

780854

781855
def register():
856+
signals.article_generator_context.connect(process_metadata)
782857
signals.content_written.connect(harvest_images)
783858
signals.feed_written.connect(harvest_feed_images)
784859
signals.finalized.connect(dump_config)

pelican/plugins/image_process/test_image_process.py

Lines changed: 91 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
compute_paths,
1515
harvest_images_in_fragment,
1616
process_image,
17+
process_metadata,
1718
set_default_settings,
1819
try_open_image,
1920
)
@@ -90,7 +91,7 @@ def get_settings(**kwargs):
9091
"OUTPUT_PATH": "output",
9192
"static_content": {},
9293
"filenames": {},
93-
"SITEURL": "//",
94+
"SITEURL": "https://www.example.com",
9495
"IMAGE_PROCESS": SINGLE_TRANSFORMS,
9596
}
9697
settings = DEFAULT_CONFIG.copy()
@@ -836,9 +837,9 @@ def test_try_open_image():
836837
assert not try_open_image(TEST_DATA.joinpath("folded_puzzle.png"))
837838
assert not try_open_image(TEST_DATA.joinpath("minimal.svg"))
838839

839-
img = {"src": "https://upload.wikimedia.org/wikipedia/commons/3/34/Exemple.png"}
840+
img_path = "https://upload.wikimedia.org/wikipedia/commons/3/34/Exemple.png"
840841
settings = get_settings(IMAGE_PROCESS_DIR="derivatives")
841-
path = compute_paths(img, settings, derivative="thumb")
842+
path = compute_paths(img_path, settings, derivative="thumb")
842843
with pytest.raises(FileNotFoundError):
843844
assert not try_open_image(path.source)
844845

@@ -901,6 +902,93 @@ def test_class_settings(mocker, orig_tag, new_tag, setting_overrides):
901902
assert harvest_images_in_fragment(orig_tag, settings) == new_tag
902903

903904

905+
@pytest.mark.parametrize(
906+
"orig_metadata, new_metadata, setting_overrides, should_process, transform_id, "
907+
"expected_output_path",
908+
[
909+
(
910+
{"title": "Test Article"},
911+
{"title": "Test Article"},
912+
{"IMAGE_PROCESS_METADATA": {"og_image": "crop"}},
913+
False,
914+
None,
915+
None,
916+
),
917+
(
918+
{"og_image": "/photos/test-image.jpg"},
919+
{
920+
"og_image": "https://www.example.com/photos/derivatives/crop/test-image.jpg",
921+
"image_process_original_metadata": {
922+
"og_image": "/photos/test-image.jpg"
923+
},
924+
},
925+
{"IMAGE_PROCESS_METADATA": {"og_image": "crop"}},
926+
True,
927+
"crop",
928+
"photos/derivatives/crop/test-image.jpg",
929+
),
930+
(
931+
{"og_image": "{resize}/photos/test-image.jpg"},
932+
{
933+
"og_image": "https://www.example.com/photos/derivatives/resize/test-image.jpg",
934+
"image_process_original_metadata": {
935+
"og_image": "/photos/test-image.jpg"
936+
},
937+
},
938+
{"IMAGE_PROCESS_METADATA": {"og_image": "crop"}},
939+
True,
940+
"resize",
941+
"photos/derivatives/resize/test-image.jpg",
942+
),
943+
# Ignore Pelican special linking directives like {static} and {attach}.
944+
(
945+
{"og_image": "{static}/photos/test-image.jpg"},
946+
{"og_image": "{static}/photos/test-image.jpg"},
947+
{"IMAGE_PROCESS_METADATA": {"og_image": "crop"}},
948+
False,
949+
None,
950+
None,
951+
),
952+
],
953+
)
954+
def test_process_metadata_image( # noqa: PLR0913
955+
mocker,
956+
orig_metadata,
957+
new_metadata,
958+
setting_overrides,
959+
should_process,
960+
transform_id,
961+
expected_output_path,
962+
):
963+
# Silence image transforms.
964+
process = mocker.patch("pelican.plugins.image_process.image_process.process_image")
965+
966+
settings = get_settings(**setting_overrides)
967+
968+
fake_generator = mocker.MagicMock()
969+
fake_generator.context = settings
970+
processed_metadata = orig_metadata.copy()
971+
process_metadata(fake_generator, processed_metadata)
972+
973+
assert processed_metadata == new_metadata
974+
975+
if should_process:
976+
path = orig_metadata["og_image"]
977+
if path.startswith("{") and "}" in path:
978+
path = path.split("}", 1)[1].lstrip()
979+
980+
process.assert_called_once_with(
981+
(
982+
os.path.join(settings["PATH"], path[1:]),
983+
os.path.join(settings["OUTPUT_PATH"], expected_output_path),
984+
SINGLE_TRANSFORMS[transform_id],
985+
),
986+
settings,
987+
)
988+
989+
assert processed_metadata["image_process_original_metadata"]["og_image"] == path
990+
991+
904992
def generate_test_images():
905993
settings = get_settings()
906994
image_count = 0

0 commit comments

Comments
 (0)