Skip to content

Commit e0ac575

Browse files
authored
Implement media archive upload endpoint (#391)
* Implement media archiv upload endpoint * Fix tests * Fix tests * Another try * Version -> 1.1.0 * Fix tests
1 parent b140273 commit e0ac575

File tree

11 files changed

+378
-17
lines changed

11 files changed

+378
-17
lines changed

gramps_webapi/_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,4 +18,4 @@
1818
#
1919

2020
# make sure to match this version with the one in apispec.yaml
21-
__version__ = "1.0.0"
21+
__version__ = "1.1.0"

gramps_webapi/api/__init__.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
from .resources.file import MediaFileResource
4747
from .resources.filters import FilterResource, FiltersResource, FiltersResources
4848
from .resources.holidays import HolidayResource, HolidaysResource
49+
from .resources.import_media import MediaUploadZipResource
4950
from .resources.importers import (
5051
ImporterFileResource,
5152
ImporterResource,
@@ -344,6 +345,13 @@ def register_endpt(resource: Type[Resource], url: str, name: str):
344345
"media_archive_filename",
345346
)
346347

348+
# Media import
349+
register_endpt(
350+
MediaUploadZipResource,
351+
"/media/archive/upload/zip",
352+
"media_archive_upload_zip",
353+
)
354+
347355

348356
# Thumbnails
349357
@api_blueprint.route("/media/<string:handle>/thumbnail/<int:size>")

gramps_webapi/api/file.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,10 @@ def _check_path(self) -> None:
132132

133133
def file_exists(self) -> bool:
134134
"""Check if the file exists."""
135-
self._check_path()
135+
try:
136+
self._check_path()
137+
except ValueError:
138+
return False
136139
return Path(self.path_abs).is_file()
137140

138141
def get_file_object(self) -> BinaryIO:

gramps_webapi/api/media.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ def filter_existing_files(
8888
"""Given a list of media objects, return the ones with existing files."""
8989
raise NotImplementedError
9090

91-
def get_media_size(self) -> int:
91+
def get_media_size(self, db_handle: Optional[DbReadBase] = None) -> int:
9292
"""Return the total disk space used by all existing media objects."""
9393
raise NotImplementedError
9494

@@ -136,16 +136,17 @@ def filter_existing_files(
136136
if self.get_file_handler(obj.handle, db_handle=db_handle).file_exists()
137137
]
138138

139-
def get_media_size(self) -> int:
139+
def get_media_size(self, db_handle: Optional[DbReadBase] = None) -> int:
140140
"""Return the total disk space used by all existing media objects.
141141
142142
Only works with a request context.
143143
"""
144+
if not db_handle:
145+
db_handle = get_db_handle()
144146
if not os.path.isdir(self.base_dir):
145147
raise ValueError(f"Directory {self.base_dir} does not exist")
146148
size = 0
147149
paths_seen = set()
148-
db_handle = get_db_handle()
149150
for obj in db_handle.iter_media():
150151
path = obj.path
151152
if os.path.isabs(path):
@@ -257,9 +258,10 @@ def filter_existing_files(
257258
remote_keys = self.get_remote_keys()
258259
return [obj for obj in objects if obj.checksum in remote_keys]
259260

260-
def get_media_size(self) -> int:
261+
def get_media_size(self, db_handle: Optional[DbReadBase] = None) -> int:
261262
"""Return the total disk space used by all existing media objects."""
262-
db_handle = get_db_handle()
263+
if not db_handle:
264+
db_handle = get_db_handle()
263265
keys = set(obj.checksum for obj in db_handle.iter_media())
264266
keys_size = get_object_keys_size(
265267
bucket_name=self.bucket_name,

gramps_webapi/api/resources/export_media.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
# along with this program. If not, see <https://www.gnu.org/licenses/>.
1818
#
1919

20-
"""Endpoint for up- and downloading media files."""
20+
"""Endpoint for creating and downloading media archives."""
2121

2222
import os
2323
import re
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
#
2+
# Gramps Web API - A RESTful API for the Gramps genealogy program
3+
#
4+
# Copyright (C) 2023 David Straub
5+
#
6+
# This program is free software; you can redistribute it and/or modify
7+
# it under the terms of the GNU Affero General Public License as published by
8+
# the Free Software Foundation; either version 3 of the License, or
9+
# (at your option) any later version.
10+
#
11+
# This program is distributed in the hope that it will be useful,
12+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14+
# GNU Affero General Public License for more details.
15+
#
16+
# You should have received a copy of the GNU Affero General Public License
17+
# along with this program. If not, see <https://www.gnu.org/licenses/>.
18+
#
19+
20+
"""Endpoint for importing a media archive."""
21+
22+
import os
23+
import uuid
24+
import zipfile
25+
26+
from flask import Response, current_app, jsonify, request
27+
28+
from ...auth.const import PERM_IMPORT_FILE
29+
from ..auth import require_permissions
30+
from ..tasks import AsyncResult, import_media_archive, make_task_response, run_task
31+
from ..util import abort_with_message, get_tree_from_jwt
32+
from . import ProtectedResource
33+
34+
35+
class MediaUploadZipResource(ProtectedResource):
36+
"""Resource for uploading an archive of media files."""
37+
38+
def post(self) -> Response:
39+
"""Upload an archive of media files."""
40+
require_permissions([PERM_IMPORT_FILE])
41+
request_stream = request.stream
42+
43+
# we use EXPORT_DIR as location to store the temporary file
44+
export_path = current_app.config["EXPORT_DIR"]
45+
os.makedirs(export_path, exist_ok=True)
46+
file_name = f"{uuid.uuid4()}.zip"
47+
file_path = os.path.join(export_path, file_name)
48+
49+
with open(file_path, "w+b") as ftmp:
50+
ftmp.write(request_stream.read())
51+
52+
if os.path.getsize(file_path) == 0:
53+
abort_with_message(400, "Imported file is empty")
54+
55+
try:
56+
with zipfile.ZipFile(file_path) as zip_file:
57+
zip_file.namelist()
58+
except zipfile.BadZipFile:
59+
abort_with_message(400, "The uploaded file is not a valid ZIP file.")
60+
61+
tree = get_tree_from_jwt()
62+
task = run_task(
63+
import_media_archive,
64+
tree=tree,
65+
file_name=file_path,
66+
delete=True,
67+
)
68+
if isinstance(task, AsyncResult):
69+
return make_task_response(task)
70+
return jsonify(task), 201

gramps_webapi/api/resources/util.py

Lines changed: 100 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@
2323

2424
import json
2525
import os
26+
import shutil
27+
import tempfile
28+
import zipfile
2629
from hashlib import sha256
2730
from http import HTTPStatus
2831
from typing import Any, Dict, List, Optional, Tuple, Union
@@ -42,19 +45,15 @@
4245
Citation,
4346
Event,
4447
Family,
45-
GrampsType,
4648
Media,
47-
Note,
4849
Person,
4950
Place,
5051
PlaceType,
51-
Repository,
5252
Source,
5353
Span,
54-
Tag,
5554
)
5655
from gramps.gen.lib.primaryobj import BasicPrimaryObject as GrampsObject
57-
from gramps.gen.lib.serialize import from_json, to_json
56+
from gramps.gen.lib.serialize import to_json
5857
from gramps.gen.plug import BasePluginManager
5958
from gramps.gen.relationship import get_relationship_calculator
6059
from gramps.gen.soundex import soundex
@@ -70,9 +69,11 @@
7069
from gramps.gen.utils.id import create_id
7170
from gramps.gen.utils.place import conv_lat_lon
7271

72+
from ...auth import set_tree_usage
7373
from ...const import DISABLED_IMPORTERS, SEX_FEMALE, SEX_MALE, SEX_UNKNOWN
7474
from ...types import FilenameOrPath, Handle, TransactionJson
75-
from ..media import get_media_handler
75+
from ..file import get_checksum
76+
from ..media import check_quota_media, get_media_handler
7677
from ..util import abort_with_message, get_db_handle, get_tree_from_jwt
7778

7879
pd = PlaceDisplay()
@@ -1209,3 +1210,96 @@ def dry_run_import(
12091210
"notes": db_handle.get_number_of_notes(),
12101211
"tags": db_handle.get_number_of_tags(),
12111212
}
1213+
1214+
1215+
def run_import_media_archive(
1216+
tree: str,
1217+
db_handle: DbReadBase,
1218+
file_name: FilenameOrPath,
1219+
delete: bool = True,
1220+
) -> Dict[str, int]:
1221+
"""Import a media archive file."""
1222+
media_handler = get_media_handler(db_handle, tree=tree)
1223+
1224+
# create a dict {checksum: [(handle1, path), (handle2, path2), ...], ...}
1225+
# of missing files
1226+
handles = db_handle.get_media_handles()
1227+
objects = [db_handle.get_media_from_handle(handle) for handle in handles]
1228+
objects_existing = media_handler.filter_existing_files(objects, db_handle=db_handle)
1229+
handles_existing = set(obj.handle for obj in objects_existing)
1230+
objects_missing = [obj for obj in objects if obj.handle not in handles_existing]
1231+
1232+
checksums_handles: Dict[str, List[Tuple[str, str, str]]] = {}
1233+
for obj in objects_missing:
1234+
if obj.checksum not in checksums_handles:
1235+
checksums_handles[obj.checksum] = []
1236+
obj_details = (obj.handle, obj.get_path(), obj.get_mime_type())
1237+
checksums_handles[obj.checksum].append(obj_details)
1238+
if len(checksums_handles) == 0:
1239+
# no missing files
1240+
# delete ZIP file
1241+
if delete:
1242+
os.remove(file_name)
1243+
return {"missing": 0, "uploaded": 0, "failures": 0}
1244+
1245+
total_size = 0
1246+
with zipfile.ZipFile(file_name, "r") as zip_file:
1247+
# compute file size
1248+
for file_info in zip_file.infolist():
1249+
total_size += file_info.file_size
1250+
1251+
# check disk usage
1252+
disk_usage = shutil.disk_usage(file_name)
1253+
if total_size > disk_usage.free:
1254+
raise ValueError("Not enough free space on disk")
1255+
1256+
# extract
1257+
temp_dir = tempfile.mkdtemp()
1258+
zip_file.extractall(temp_dir)
1259+
1260+
# delete ZIP file
1261+
if delete:
1262+
os.remove(file_name)
1263+
1264+
to_upload = {}
1265+
# walk extracted files
1266+
for root, _, files in os.walk(temp_dir):
1267+
for name in files:
1268+
file_path = os.path.join(root, name)
1269+
with open(file_path, "rb") as f:
1270+
checksum = get_checksum(f)
1271+
if checksum in checksums_handles and checksum not in to_upload:
1272+
to_upload[checksum] = (file_path, os.path.getsize(file_path))
1273+
1274+
if len(to_upload) == 0:
1275+
# no files to upload
1276+
1277+
# delete extracted temp files
1278+
shutil.rmtree(temp_dir)
1279+
1280+
return {"missing": len(checksums_handles), "uploaded": 0, "failures": 0}
1281+
1282+
upload_size = sum([file_size for (file_path, file_size) in to_upload.values()])
1283+
check_quota_media(to_add=upload_size, tree=tree)
1284+
1285+
num_failures = 0
1286+
for checksum, (file_path, file_size) in to_upload.items():
1287+
for handle, media_path, mime in checksums_handles[checksum]:
1288+
with open(file_path, "rb") as f:
1289+
try:
1290+
media_handler.upload_file(f, checksum, mime, path=media_path)
1291+
except Exception:
1292+
num_failures += 1
1293+
1294+
# delete extracted temp files
1295+
shutil.rmtree(temp_dir)
1296+
1297+
# update media usage
1298+
usage_media = media_handler.get_media_size(db_handle=db_handle)
1299+
set_tree_usage(tree, usage_media=usage_media)
1300+
1301+
return {
1302+
"missing": len(checksums_handles),
1303+
"uploaded": len(to_upload) - num_failures,
1304+
"failures": num_failures,
1305+
}

gramps_webapi/api/tasks.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
from .export import prepare_options, run_export
3434
from .media import get_media_handler
3535
from .report import run_report
36-
from .resources.util import dry_run_import, run_import
36+
from .resources.util import dry_run_import, run_import, run_import_media_archive
3737
from .util import (
3838
check_quota_people,
3939
get_config,
@@ -201,3 +201,17 @@ def export_media(tree: str, view_private: bool) -> Dict[str, Union[str, int]]:
201201
"url": f"/api/media/archive/{file_name}",
202202
"file_size": file_size,
203203
}
204+
205+
206+
@shared_task()
207+
def import_media_archive(tree: str, file_name: str, delete: bool = True):
208+
"""Import a media archive."""
209+
# check_quota_people(to_add=object_counts["people"], tree=tree)
210+
db_handle = get_db_outside_request(tree=tree, view_private=True, readonly=True)
211+
result = run_import_media_archive(
212+
tree=tree,
213+
db_handle=db_handle,
214+
file_name=file_name,
215+
delete=delete,
216+
)
217+
return result

gramps_webapi/data/apispec.yaml

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ info:
88
99
1010
* More about Gramps and the numerous features it provides for genealogists can be found at https://gramps-project.org
11-
version: "1.0.0" # make sure to match this version with the one in _version.py
11+
version: "1.1.0" # make sure to match this version with the one in _version.py
1212
license:
1313
name: "GNU Affero General Public License v3.0"
1414
url: "http://www.gnu.org/licenses/agpl-3.0.html"
@@ -3927,6 +3927,29 @@ paths:
39273927
422:
39283928
description: "Unprocessable Entity: Invalid or bad parameter provided."
39293929

3930+
/media/archive/upload/zip:
3931+
post:
3932+
tags:
3933+
- media
3934+
summary: "Upload a zipped media file archive."
3935+
operationId: uploadMediaFileArchive
3936+
security:
3937+
- Bearer: []
3938+
responses:
3939+
201:
3940+
description: "OK: Successful operation."
3941+
202:
3942+
description: "Accepted: import will be processed in the background."
3943+
schema:
3944+
type: object
3945+
properties:
3946+
task:
3947+
$ref: "#/definitions/TaskReference"
3948+
401:
3949+
description: "Unauthorized: Missing authorization header."
3950+
403:
3951+
description: "Unauthorized: insufficient permissions."
3952+
39303953
##############################################################################
39313954
# Endpoint - Notes
39323955
##############################################################################

0 commit comments

Comments
 (0)