Skip to content

Commit 62bdb20

Browse files
kyodai0000Ondřej RumlArsenii PogodinMartin Čorovčák
committed
feat: UI support for zip content previewing
* New entrypoint for zip enhanced previewer * Two new routes for preview and download container items * Zip enhanced previewer * Decorator to pass container item to views * view function to preview specific container item and download it * JS and HTML files Co-authored-by: Ondřej Ruml <Ondrej.Ruml@cesnet.cz> Co-authored-by: Arsenii Pogodin <pogodars@cvut.cz> Co-authored-by: Martin Čorovčák <Martin.Corovcak@cesnet.cz>
1 parent 0e5af31 commit 62bdb20

File tree

12 files changed

+473
-2
lines changed

12 files changed

+473
-2
lines changed

invenio_app_rdm/config.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -805,7 +805,9 @@ def files_rest_permission_factory(obj, action):
805805
"record_detail": "/records/<pid_value>",
806806
"record_export": "/records/<pid_value>/export/<export_format>",
807807
"record_file_preview": "/records/<pid_value>/preview/<path:filename>",
808+
"record_container_item_preview": "/records/<pid_value>/preview/<path:filename>/container/<path:path>",
808809
"record_file_download": "/records/<pid_value>/files/<path:filename>",
810+
"record_container_item_download": "/records/<pid_value>/files/<path:filename>/container/<path:path>",
809811
"record_thumbnail": "/records/<pid_value>/thumb<int:size>",
810812
"record_media_file_download": "/records/<pid_value>/media-files/<path:filename>",
811813
"record_from_pid": "/<any({schemes}):pid_scheme>/<path:pid_value>",
@@ -1168,14 +1170,28 @@ def github_link_render(record):
11681170
"video_videojs",
11691171
"audio_videojs",
11701172
"ipynb",
1171-
"zip",
1173+
"previewable_zip",
11721174
"txt",
11731175
]
11741176
"""Preferred previewers."""
11751177

11761178
PREVIEWER_ABSTRACT_TEMPLATE = "invenio_previewer/rdm_abstract_previewer.html"
11771179
"""Override the abstract template with an RDM-specific one."""
11781180

1181+
CONTAINER_ITEM_PREVIEWER_PREFERENCE = [
1182+
"csv_papaparsejs",
1183+
"pdfjs",
1184+
"simple_image",
1185+
"json_prismjs",
1186+
"xml_prismjs",
1187+
"mistune",
1188+
"video_videojs",
1189+
"audio_videojs",
1190+
"ipynb",
1191+
"zip",
1192+
"txt",
1193+
]
1194+
11791195
RECORDS_RESOURCES_IMAGE_FORMATS = ["." + ext for ext in IIIF_FORMATS.keys()]
11801196
"""RECORDS_RESOURCES_IMAGE_FORMATS must contain all possible IIIF formats to ensure their metadata is extracted."""
11811197

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
# -*- coding: utf-8 -*-
2+
#
3+
# This file is part of Invenio.
4+
# Copyright (C) 2025 CESNET i.a.l.e.
5+
#
6+
# Invenio is free software; you can redistribute it and/or modify it
7+
# under the terms of the MIT License; see LICENSE file for more details.
8+
9+
"""Simple ZIP archive previewer."""
10+
11+
import sys
12+
13+
from flask import render_template
14+
from invenio_access.permissions import system_identity
15+
from invenio_base import invenio_url_for
16+
from invenio_previewer.proxies import current_previewer
17+
from invenio_previewer.views import is_container_item_previewable
18+
19+
from ..views.records import PreviewContainerItem
20+
21+
previewable_extensions = ["zip"]
22+
23+
24+
def create_container_item_preview_link(record_id, container_filename, item_path):
25+
"""Create preview link for a container item."""
26+
values = {
27+
"pid_value": record_id,
28+
"filename": container_filename, # specific .zip file
29+
"path": item_path,
30+
}
31+
return invenio_url_for(
32+
"invenio_app_rdm_records.record_container_item_preview", **values
33+
)
34+
35+
36+
def convert_zip_list_container(entries, folders, record_id, container_filename):
37+
"""Convert structure returned by files.list_container(...).to_dict()."""
38+
counter = iter(range(sys.maxsize))
39+
40+
def create_parent_hierarchy(node, path):
41+
42+
for path_item in path:
43+
items = node["children"]
44+
for child_item in items:
45+
if child_item["name"] == path_item:
46+
node = child_item
47+
break
48+
else:
49+
node = {
50+
"type": "folder",
51+
"name": path_item,
52+
"id": f"folder{next(counter)}",
53+
"children": [],
54+
}
55+
items.append(node)
56+
return node
57+
58+
def convert_file_entry(key, node):
59+
"""Convert one node (file or folder)."""
60+
converted = {
61+
"name": key,
62+
"type": "item",
63+
"id": f"item{next(counter)}",
64+
}
65+
66+
# Copy metadata fields if they exist
67+
for field in ("size", "compressed_size", "mime_type", "crc", "links"):
68+
if field in node:
69+
converted[field] = node[field]
70+
71+
# create preview link
72+
container_item_extension = key.split(".")[-1].lower()
73+
if is_container_item_previewable(container_item_extension):
74+
converted["links"].update(
75+
{
76+
"preview": create_container_item_preview_link(
77+
record_id, container_filename, node["key"]
78+
)
79+
}
80+
)
81+
return converted
82+
83+
def convert_folder(key, node):
84+
"""Convert one node (file or folder)."""
85+
converted = {
86+
"name": key,
87+
"type": "folder",
88+
"id": f"folder{next(counter)}",
89+
"children": [],
90+
"links": node["links"],
91+
}
92+
return converted
93+
94+
# Root folder
95+
root = {"type": "folder", "id": -1, "children": []}
96+
97+
# Convert items of root
98+
for folder in sorted(folders, key=lambda x: x["key"]):
99+
folder_key = folder["key"].split("/")
100+
converted = convert_folder(folder_key[-1], folder)
101+
hierarchy_position = create_parent_hierarchy(root, folder_key[:-1])
102+
hierarchy_position["children"].append(converted)
103+
104+
for entry in sorted(entries, key=lambda x: x["key"]):
105+
entry_key = entry["key"].split("/")
106+
converted = convert_file_entry(entry_key[-1], entry)
107+
hierarchy_position = create_parent_hierarchy(root, entry_key[:-1])
108+
hierarchy_position["children"].append(converted)
109+
110+
return root
111+
112+
113+
def can_preview(file):
114+
"""Return True if filetype can be previewed."""
115+
return (
116+
file.is_local()
117+
and file.has_extensions(".zip")
118+
and not isinstance(file, PreviewContainerItem) # we are top level file
119+
)
120+
121+
122+
def preview(file):
123+
"""Return the appropriate template and pass the file and an embed flag."""
124+
from invenio_rdm_records.proxies import current_rdm_records_service
125+
126+
tree_raw = current_rdm_records_service.files.list_container(
127+
system_identity, file.record["id"], file.filename
128+
).to_dict()
129+
130+
converted_tree = convert_zip_list_container(
131+
tree_raw["entries"], tree_raw["folders"], file.record["id"], file.filename
132+
)
133+
tree_list = converted_tree["children"]
134+
return render_template(
135+
"invenio_previewer/previewable_zip.html",
136+
file=file,
137+
tree=tree_list,
138+
limit_reached=False,
139+
error=None,
140+
js_bundles=current_previewer.js_bundles + ["previewable-zip.js"],
141+
css_bundles=current_previewer.css_bundles + ["zip_css.css"],
142+
)

invenio_app_rdm/records_ui/views/__init__.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@
4848
from .records import (
4949
draft_not_found_error,
5050
not_found_error,
51+
record_container_item_download,
52+
record_container_item_preview,
5153
record_detail,
5254
record_export,
5355
record_file_download,
@@ -114,13 +116,25 @@ def create_blueprint(app):
114116
default_view_func=record_file_preview,
115117
)
116118
)
119+
blueprint.add_url_rule(
120+
**create_url_rule(
121+
routes["record_container_item_preview"],
122+
default_view_func=record_container_item_preview,
123+
)
124+
)
117125

118126
blueprint.add_url_rule(
119127
**create_url_rule(
120128
routes["record_file_download"],
121129
default_view_func=record_file_download,
122130
)
123131
)
132+
blueprint.add_url_rule(
133+
**create_url_rule(
134+
routes["record_container_item_download"],
135+
default_view_func=record_container_item_download,
136+
)
137+
)
124138
blueprint.add_url_rule(
125139
**create_url_rule(
126140
routes["record_thumbnail"],

invenio_app_rdm/records_ui/views/decorators.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
# Copyright (C) 2019-2025 CERN.
44
# Copyright (C) 2019-2025 Northwestern University.
55
# Copyright (C) 2021 TU Wien.
6+
# Copyright (C) 2025 CESNET i.a.l.e.
67
#
78
# Invenio App RDM is free software; you can redistribute it and/or modify it
89
# under the terms of the MIT License; see LICENSE file for more details.
@@ -260,6 +261,48 @@ def view(**kwargs):
260261
return decorator
261262

262263

264+
def pass_container_item():
265+
"""Decorator to pass a extracted file from container (e.g. zip)."""
266+
267+
def decorator(f):
268+
@wraps(f)
269+
def view(**kwargs):
270+
pid_value = kwargs.get("pid_value")
271+
file_key = kwargs.get("filename")
272+
path = kwargs.get("path")
273+
extract_kwargs = {
274+
"id_": pid_value,
275+
"file_key": file_key,
276+
"identity": g.identity,
277+
"path": path,
278+
}
279+
280+
from invenio_records_resources.proxies import current_service_registry
281+
282+
file_service = current_service_registry.get("files")
283+
284+
try:
285+
item = file_service.extract_container_item(**extract_kwargs)
286+
287+
kwargs["container_item"] = item
288+
return f(**kwargs)
289+
290+
except RecordDeletedException:
291+
# Redirect to the record page which has proper tombstone handling
292+
return redirect(
293+
url_for(
294+
"invenio_app_rdm_records.record_detail",
295+
pid_value=pid_value,
296+
),
297+
# Use 302 (temporary) instead of 301 since records can be restored
298+
code=302,
299+
)
300+
301+
return view
302+
303+
return decorator
304+
305+
263306
def pass_file_metadata(f):
264307
"""Decorate a view to pass a file's metadata using the files service."""
265308

0 commit comments

Comments
 (0)