Skip to content

Commit 87b9660

Browse files
committed
feat(thumbnails): add cover image support with rero-invenio-thumbnails
* Add rero-invenio-thumbnails dependency for book cover retrieval * Configure RERO_INVENIO_THUMBNAILS_FILES_DIR in config.py * Fix Flask endpoint name: api_thumbnails.get_thumbnail * Fix URL parameter bug: weight → width API Endpoints: * Add /cover/<isbn> API endpoint for cover image retrieval - Supports ISBN-10 and ISBN-13 formats - Accepts cached, width, and height query parameters - Returns JSON with thumbnail URL and metadata Template Filters: * Add get_cover_art template filter for Jinja2 templates - Supports electronicLocator coverImage URLs - Falls back to ISBN-based thumbnail lookup - Handles multiple ISBNs in sorted order CLI Commands: * Add add_cover_urls command to bulk process documents - Supports --commit flag for database persistence - Supports --cached/--no-cached for thumbnail caching - Supports --scroll for Elasticsearch timeout configuration - Supports --pids option for processing specific documents - Excludes documents already having cover images - Sorts PIDs numerically for consistent processing Document Extensions: * Add AddCoverUrlExtension for automatic cover URL injection - Automatically adds cover URLs on document create/commit - Checks for existing cover images to avoid duplicates - Configurable caching support Tasks: * Add add_cover_urls shared task for batch processing - Used by CLI command and can be called programmatically - Supports filtering by PIDs or processing all documents - Updates timestamp tracking for monitoring Co-Authored-by: Peter Weber <peter.weber@rero.ch>
1 parent 85f98ce commit 87b9660

File tree

26 files changed

+937
-130
lines changed

26 files changed

+937
-130
lines changed

data/thumbnails/9782607000423.jpg

22.7 KB
Loading

pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@ dependencies = [
5252
## RERO specific python modules
5353
"rero-invenio-base (>=0.3.0)",
5454
"rero-invenio-files (>=1.0.0,<2.0.0)",
55+
# TODO: remove the git dependency when the module will be released on PyPI
56+
"rero-invenio-thumbnails @ git+https://github.com/rero/rero-invenio-thumbnails.git",
5557
"flask-wiki (>=1.0.0)",
5658
## RERO ILS specific python modules
5759
"PyYAML (>=5.3.1)",

rero_ils/config.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3499,6 +3499,9 @@ def _(x):
34993499
#: page is displayed on RERO-ILS frontpage.
35003500
RERO_ILS_UI_GIT_HASH = None
35013501

3502+
#: Thumbnails directory configuration
3503+
RERO_INVENIO_THUMBNAILS_FILES_DIR = "./data/thumbnails"
3504+
35023505
#: RERO_ILS MEF base url could be changed.
35033506
RERO_ILS_MEF_REF_BASE_URL = os.environ.get("RERO_ILS_MEF_REF_BASE_URL", "mef.rero.ch")
35043507
#: RERO_ILS MEF specific configurations.
@@ -3536,8 +3539,6 @@ def _(x):
35363539

35373540
RERO_ILS_HELP_PAGE = "https://github.com/rero/rero-ils/wiki/Public-demo-help"
35383541

3539-
#: Cover service
3540-
RERO_ILS_THUMBNAIL_SERVICE_URL = "https://services.test.rero.ch/cover"
35413542

35423543
#: Entities
35433544
RERO_ILS_AGENTS_SOURCES = ["idref", "gnd", "rero"]

rero_ils/modules/cli/utils.py

Lines changed: 18 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@
3838
import yaml
3939
from celery import current_app as current_celery
4040
from dojson.contrib.marc21.utils import create_record
41-
from elasticsearch_dsl.query import Q
4241
from flask import current_app
4342
from flask.cli import with_appcontext
4443
from invenio_db import db
@@ -55,9 +54,9 @@
5554
from werkzeug.local import LocalProxy
5655
from werkzeug.security import gen_salt
5756

58-
from rero_ils.modules.documents.api import Document, DocumentsSearch
57+
from rero_ils.modules.documents.api import Document
5958
from rero_ils.modules.documents.dojson.contrib.marc21tojson.rero import marc21
60-
from rero_ils.modules.documents.views import get_cover_art
59+
from rero_ils.modules.documents.tasks import add_cover_urls as task_add_cover_urls
6160
from rero_ils.modules.entities.remote_entities.api import RemoteEntity
6261
from rero_ils.modules.files.cli import load_files
6362
from rero_ils.modules.items.api import Item
@@ -1579,26 +1578,25 @@ def token_create(name, user, scopes, internal, access_token):
15791578
click.secho("No user found", fg="red")
15801579

15811580

1582-
@utils.command("add_cover_urls")
1583-
@click.option("-v", "--verbose", "verbose", is_flag=True, default=False)
1581+
@utils.command()
1582+
@click.option("-c", "--commit", "commit", is_flag=True, default=False, help="Commit changes to database.")
1583+
@click.option("-v", "--verbose", "verbose", is_flag=True, default=False, help="Verbose print.")
1584+
@click.option("--cached/--no-cached", "cached", default=True, help="Use cached thumbnails.")
1585+
@click.option("-s", "--scroll", "scroll", default="60m", help="Elasticsearch scroll timeout.")
1586+
@click.option(
1587+
"-p", "--pids", "pids_file", type=click.File("r"), default=None, help="File with document pids to process."
1588+
)
15841589
@with_appcontext
1585-
def add_cover_urls(verbose):
1590+
def add_cover_urls(commit, verbose, cached, scroll, pids_file):
15861591
"""Add cover urls to all documents with isbns."""
1587-
click.secho("Add cover urls.", fg="green")
1588-
search = (
1589-
DocumentsSearch()
1590-
.filter("term", identifiedBy__type="bf:Isbn")
1591-
.filter("bool", must_not=[Q("term", electronicLocator__content="coverImage")])
1592-
.params(preserve_order=True)
1593-
.sort({"pid": {"order": "asc"}})
1594-
.source("pid")
1595-
)
1596-
for idx, hit in enumerate(search.scan()):
1597-
pid = hit.pid
1598-
record = Document.get_record_by_pid(pid)
1599-
url = get_cover_art(record=record, save_cover_url=True)
1592+
1593+
pids = None
1594+
if pids_file:
1595+
pids = [line.strip() for line in pids_file if line.strip()]
16001596
if verbose:
1601-
click.echo(f"{idx}:\tdocument: {pid}\t{url}")
1597+
click.secho(f"Processing {len(pids)} documents from file", fg="green")
1598+
1599+
return task_add_cover_urls(commit=commit, verbose=verbose, cached=cached, scroll=scroll, pids=pids)
16021600

16031601

16041602
@utils.command()

rero_ils/modules/documents/api.py

Lines changed: 38 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242

4343
from .dumpers import document_indexer_dumper, document_replace_refs_dumper
4444
from .extensions import (
45+
AddCoverUrlExtension,
4546
AddMEFPidExtension,
4647
EditionStatementExtension,
4748
ProvisionActivitiesExtension,
@@ -115,13 +116,14 @@ class Document(IlsRecord):
115116
enable_jsonref = False
116117

117118
_extensions = [
118-
OperationLogObserverExtension(),
119+
AddCoverUrlExtension(),
119120
AddMEFPidExtension("subjects", "contribution", "genreForm"),
121+
DeleteRelatedLocalFieldExtension(),
122+
EditionStatementExtension(),
123+
OperationLogObserverExtension(),
120124
ProvisionActivitiesExtension(),
121125
SeriesStatementExtension(),
122-
EditionStatementExtension(),
123126
TitleExtension(),
124-
DeleteRelatedLocalFieldExtension(),
125127
]
126128

127129
def _validate(self, **kwargs):
@@ -490,17 +492,40 @@ def document_types(self):
490492
document_types.append(main_type)
491493
return document_types or ["docmaintype_other"]
492494

493-
def add_cover_url(self, url, dbcommit=False, reindex=False):
494-
"""Adds electronicLocator with coverImage to document."""
495-
electronic_locators = self.get("electronicLocator", [])
496-
for electronic_locator in electronic_locators:
497-
e_content = electronic_locator.get("content")
498-
e_type = electronic_locator.get("type")
499-
if e_content == "coverImage" and e_type == "relatedResource" and electronic_locator.get("url") == url:
495+
def add_cover_url(self, url, provider=None, dbcommit=False, reindex=False, force=False):
496+
"""Add or replace a coverImage electronicLocator on the document.
497+
498+
- If a ``coverImage``/``relatedResource`` locator already exists and
499+
``force=False``: no-op, returns ``(self, False)``.
500+
- If a ``coverImage``/``relatedResource`` locator already exists and
501+
``force=True``: replaces its URL and provider note in place.
502+
- If no ``coverImage``/``relatedResource`` locator exists: appends one.
503+
504+
:param url: str - the cover image URL to store.
505+
:param provider: str - optional provider/attribution note.
506+
:param dbcommit: bool - commit the change to the database.
507+
:param reindex: bool - reindex the document after update.
508+
:param force: bool - overwrite an existing coverImage locator.
509+
:returns: tuple (updated_record, changed) where changed is True if the
510+
document was modified.
511+
"""
512+
locators = self.get("electronicLocator", [])
513+
if existing := next(
514+
(loc for loc in locators if loc.get("content") == "coverImage" and loc.get("type") == "relatedResource"),
515+
None,
516+
):
517+
if not force:
500518
return self, False
501-
electronic_locators.append({"content": "coverImage", "type": "relatedResource", "url": url})
502-
self["electronicLocator"] = electronic_locators
503-
self = self.update(data=self, commit=True, dbcommit=dbcommit, reindex=reindex)
519+
existing["url"] = url
520+
if provider:
521+
existing["note"] = provider
522+
else:
523+
locator = {"content": "coverImage", "type": "relatedResource", "url": url}
524+
if provider:
525+
locator["note"] = provider
526+
locators.append(locator)
527+
self["electronicLocator"] = locators
528+
self.update(data=self, commit=True, dbcommit=dbcommit, reindex=reindex)
504529
return self, True
505530

506531
def resolve(self):

rero_ils/modules/documents/api_views.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,21 +24,29 @@
2424
from flask import request as flask_request
2525
from invenio_jsonschemas import current_jsonschemas
2626
from invenio_jsonschemas.errors import JSONSchemaNotFound
27+
from rero_invenio_thumbnails import get_thumbnail_url
2728

2829
from rero_ils.modules.decorators import check_logged_as_librarian
2930

3031
from ..utils import cached
3132
from .api import Document
32-
from .utils import get_remote_cover
3333

3434
api_blueprint = Blueprint("api_documents", __name__, url_prefix="/document")
3535

3636

3737
@api_blueprint.route("/cover/<isbn>")
38-
@cached(timeout=5 * 60, query_string=True) # 5 minutes timeout
3938
def cover(isbn):
4039
"""Document cover service."""
41-
return jsonify(get_remote_cover(isbn))
40+
41+
use_cache = flask_request.args.get("cached", default="true").lower() != "false"
42+
try:
43+
url, provider = get_thumbnail_url(isbn, cached=use_cache)
44+
except Exception as e:
45+
current_app.logger.warning(f"Thumbnail lookup failed for ISBN {isbn}: {e}")
46+
return jsonify({"success": False, "isbn": isbn})
47+
if url:
48+
return jsonify({"success": True, "image": url, "isbn": isbn, "provider": provider})
49+
return jsonify({"success": False, "isbn": isbn})
4250

4351

4452
@api_blueprint.route("/<pid>/availability", methods=["GET"])

rero_ils/modules/documents/dojson/contrib/jsontodc/model.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ def json_to_descriptions(self, key, value):
128128
if key == "supplementaryContent":
129129
descriptions.append(data)
130130
elif key == "summary":
131-
descriptions += [label["value"] for label in data.get("label", [])]
131+
descriptions += [v for v in (label.get("value") for label in data.get("label", [])) if v is not None]
132132
elif label := data.get("label"):
133133
descriptions.append(label)
134134
if descriptions:
@@ -178,6 +178,8 @@ def json_to_dates(self, key, value):
178178
def json_to_types(self, key, value):
179179
"""Get types data."""
180180
main_type = value.get("main_type")
181+
if not main_type:
182+
return None
181183
if subtype_type := value.get("subtype"):
182184
return " / ".join([_(main_type), _(subtype_type)])
183185
return _(main_type)

rero_ils/modules/documents/dojson/contrib/jsontomarc21/model.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,8 @@ def do_contribution(contribution, source_order):
131131
return None, None, False, False
132132
if not (preferred_name := entity.get("preferred_name")):
133133
preferred_name = entity.get(f"authorized_access_point_{to_marc21.language}")
134+
if not preferred_name:
135+
return None, None, False, False
134136
result = {}
135137
conference = False
136138
surname = False
@@ -270,10 +272,10 @@ def get_name(resource, pid):
270272
item_hits = ItemsSearch().filter("terms", pid=list(item_pids)).scan()
271273
for item_hit in item_hits:
272274
item_data = item_hit.to_dict()
273-
item_result = result
275+
item_result = dict(result)
274276
item_result["item"] = {
275277
"barcode": item_data.get("barcode"),
276-
"all_number": item_data.get("all_number"),
278+
"call_number": item_data.get("call_number"),
277279
"second_call_number": item_data.get("second_call_number"),
278280
"enumerationAndChronology": item_data.get("enumerationAndChronology"),
279281
"url": item_data.get("url"),

rero_ils/modules/documents/dojson/contrib/marc21tojson/loc/model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# -*- coding: utf-8 -*-
22
#
33
# RERO ILS
4-
# Copyright (C) 2019-2022 RERO
4+
# Copyright (C) 2019-2026 RERO
55
# Copyright (C) 2019-2022 UCLOUVAIN
66
#
77
# This program is free software: you can redistribute it and/or modify

rero_ils/modules/documents/dojson/contrib/marc21tojson/slsp/model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# -*- coding: utf-8 -*-
22
#
33
# RERO ILS
4-
# Copyright (C) 2019-2022 RERO
4+
# Copyright (C) 2019-2026 RERO
55
# Copyright (C) 2019-2022 UCLOUVAIN
66
#
77
# This program is free software: you can redistribute it and/or modify

0 commit comments

Comments
 (0)