Skip to content

Commit 9fddab5

Browse files
author
Jens Kürten
committed
file upload service
1 parent 65fbfc2 commit 9fddab5

File tree

7 files changed

+284
-15
lines changed

7 files changed

+284
-15
lines changed

csfunctions/handler.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -96,9 +96,7 @@ def execute(function_name: str, request_body: str, function_dir: str = "src") ->
9696
link_objects(request.event)
9797

9898
function_callback = get_function_callable(function_name, function_dir)
99-
service = Service(
100-
str(request.metadata.service_url) if request.metadata.service_url else None, request.metadata.service_token
101-
)
99+
service = Service(metadata=request.metadata)
102100

103101
response = function_callback(request.metadata, request.event, service)
104102

csfunctions/service/__init__.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from csfunctions.metadata import MetaData
2+
from csfunctions.service.file_upload import FileUploadService
13
from csfunctions.service.numgen import NumberGeneratorService
24

35

@@ -6,5 +8,6 @@ class Service:
68
Provides access to services on the elements instance, e.g. generating numbers.
79
"""
810

9-
def __init__(self, service_url: str | None, service_token: str | None):
10-
self.generator = NumberGeneratorService(service_url, service_token)
11+
def __init__(self, metadata: MetaData):
12+
self.generator = NumberGeneratorService(metadata=metadata)
13+
self.file_upload = FileUploadService(metadata=metadata)

csfunctions/service/base.py

Lines changed: 31 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,36 +2,58 @@
22

33
import requests
44

5+
from csfunctions.metadata import MetaData
6+
57

68
class Unauthorized(Exception):
79
pass
810

911

12+
class Conflict(Exception):
13+
pass
14+
15+
16+
class NotFound(Exception):
17+
pass
18+
19+
20+
class UnprocessableEntity(Exception):
21+
pass
22+
23+
1024
class BaseService:
1125
"""
1226
Base class for services.
1327
"""
1428

15-
def __init__(self, service_url: str | None, service_token: str | None):
16-
self.service_url = service_url
17-
self.service_token = service_token
29+
def __init__(self, metadata: MetaData):
30+
# Store full metadata for services that need additional fields (e.g. app_user)
31+
self.metadata = metadata
1832

19-
def request(self, endpoint: str, method: str = "GET", params: Optional[dict] = None) -> dict | list:
33+
def request(
34+
self, endpoint: str, method: str = "GET", params: Optional[dict] = None, json: Optional[dict] = None
35+
) -> dict | list:
2036
"""
2137
Make a request to the access service.
2238
"""
23-
if self.service_url is None:
39+
if self.metadata.service_url is None:
2440
raise ValueError("No service url given.")
25-
if self.service_token is None:
41+
if self.metadata.service_token is None:
2642
raise ValueError("No service token given.")
2743

28-
headers = {"Authorization": f"Bearer {self.service_token}"}
44+
headers = {"Authorization": f"Bearer {self.metadata.service_token}"}
2945
params = params or {}
30-
url = self.service_url.rstrip("/") + "/" + endpoint.lstrip("/")
31-
response = requests.request(method, url=url, params=params, headers=headers, timeout=10)
46+
url = str(self.metadata.service_url).rstrip("/") + "/" + endpoint.lstrip("/")
47+
response = requests.request(method, url=url, params=params, headers=headers, timeout=10, json=json)
3248

3349
if response.status_code == 401:
3450
raise Unauthorized
51+
elif response.status_code == 409:
52+
raise Conflict
53+
elif response.status_code == 404:
54+
raise NotFound
55+
elif response.status_code == 422:
56+
raise UnprocessableEntity(response.text)
3557
if response.status_code == 200:
3658
return response.json()
3759
else:

csfunctions/service/file_upload.py

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
import hashlib
2+
from copy import deepcopy
3+
from random import choice
4+
from string import ascii_letters
5+
from typing import BinaryIO
6+
7+
import requests
8+
9+
from csfunctions.service.base import BaseService
10+
from csfunctions.service.file_upload_schemas import (
11+
CompleteFileUploadRequest,
12+
CreateNewFileRequest,
13+
CreateNewFileResponse,
14+
GeneratePresignedUrlRequest,
15+
PresignedWriteUrls,
16+
)
17+
18+
19+
def _generate_lock_id():
20+
return "".join(choice(ascii_letters) for i in range(12)) # nosec
21+
22+
23+
class FileUploadService(BaseService):
24+
def _create_new_file(self, filename: str, parent_object_id: str, persno: str, check_access: bool = True) -> str:
25+
"""Creates a new (empty) file attached to the parent object and returns the cdb_object_id."""
26+
response_json = self.request(
27+
endpoint="/file_upload/create",
28+
method="POST",
29+
json=CreateNewFileRequest(
30+
parent_object_id=parent_object_id, filename=filename, persno=persno, check_access=check_access
31+
).model_dump(),
32+
)
33+
data = CreateNewFileResponse.model_validate(response_json)
34+
return data.file_object_id
35+
36+
def _get_presigned_write_urls(
37+
self, file_object_id: str, filesize: int, lock_id: str, persno: str, check_access: bool = True
38+
) -> PresignedWriteUrls:
39+
response_json = self.request(
40+
endpoint=f"/file_upload/{file_object_id}/generate_presigned_url",
41+
method="POST",
42+
json=GeneratePresignedUrlRequest(
43+
check_access=check_access, persno=persno, filesize=filesize, lock_id=lock_id
44+
).model_dump(),
45+
)
46+
47+
return PresignedWriteUrls.model_validate(response_json)
48+
49+
def _upload_file_content(
50+
self, presigned_urls: PresignedWriteUrls, stream: BinaryIO
51+
) -> tuple[PresignedWriteUrls, str]:
52+
"""Upload file stream in chunks using presigned URLs and return updated context + sha256 hash."""
53+
etags: list[str] = []
54+
sha256 = hashlib.sha256()
55+
for url in presigned_urls.urls:
56+
data: bytes = stream.read(presigned_urls.chunksize)
57+
sha256.update(data)
58+
resp = requests.put(url, data=data, headers=presigned_urls.headers, timeout=20)
59+
# 20 second timeout to stay below 30s max execution time of the Function
60+
# otherwise we won't get a proper error message in the logs
61+
resp.raise_for_status()
62+
etag = resp.headers.get("ETag")
63+
if etag:
64+
etags.append(etag)
65+
updated = deepcopy(presigned_urls)
66+
if etags:
67+
updated.etags = etags
68+
return updated, sha256.hexdigest()
69+
70+
@staticmethod
71+
def _get_stream_size(stream: BinaryIO) -> int:
72+
if not stream.seekable():
73+
raise ValueError("Stream is not seekable; size cannot be determined.")
74+
current_pos = stream.tell()
75+
stream.seek(0, 2)
76+
size = stream.tell()
77+
stream.seek(current_pos)
78+
return size
79+
80+
def _complete_upload(
81+
self,
82+
file_object_id: str,
83+
filesize: int,
84+
lock_id: str,
85+
presigned_urls: PresignedWriteUrls,
86+
persno: str,
87+
check_access: bool = True,
88+
sha256: str | None = None,
89+
delete_derived_files: bool = True,
90+
) -> None:
91+
self.request(
92+
endpoint=f"/file_upload/{file_object_id}/complete",
93+
method="POST",
94+
json=CompleteFileUploadRequest(
95+
filesize=filesize,
96+
check_access=check_access,
97+
persno=persno,
98+
presigned_write_urls=presigned_urls,
99+
lock_id=lock_id,
100+
sha256=sha256,
101+
delete_derived_files=delete_derived_files,
102+
).model_dump(),
103+
)
104+
105+
def upload_file_content(
106+
self,
107+
file_object_id: str,
108+
stream: BinaryIO,
109+
persno: str | None = None,
110+
check_access: bool = True,
111+
filesize: int | None = None,
112+
delete_derived_files: bool = True,
113+
) -> None:
114+
persno = persno or self.metadata.app_user
115+
if filesize is None:
116+
filesize = self._get_stream_size(stream)
117+
lock_id = _generate_lock_id()
118+
presigned = self._get_presigned_write_urls(
119+
file_object_id=file_object_id,
120+
filesize=filesize,
121+
lock_id=lock_id,
122+
persno=persno,
123+
check_access=check_access,
124+
)
125+
presigned_with_etags, sha256 = self._upload_file_content(presigned_urls=presigned, stream=stream)
126+
self._complete_upload(
127+
file_object_id=file_object_id,
128+
filesize=filesize,
129+
lock_id=lock_id,
130+
presigned_urls=presigned_with_etags,
131+
persno=persno,
132+
check_access=check_access,
133+
sha256=sha256,
134+
delete_derived_files=delete_derived_files,
135+
)
136+
137+
def upload_new_file(
138+
self,
139+
parent_object_id: str,
140+
filename: str,
141+
stream: BinaryIO,
142+
persno: str | None = None,
143+
check_access: bool = True,
144+
filesize: int | None = None,
145+
) -> str:
146+
persno = persno or self.metadata.app_user
147+
file_object_id = self._create_new_file(
148+
filename=filename,
149+
parent_object_id=parent_object_id,
150+
persno=persno,
151+
check_access=check_access,
152+
)
153+
self.upload_file_content(
154+
file_object_id=file_object_id,
155+
stream=stream,
156+
persno=persno,
157+
check_access=check_access,
158+
filesize=filesize,
159+
delete_derived_files=False,
160+
)
161+
return file_object_id
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
from typing import Optional
2+
3+
from pydantic import BaseModel, Field
4+
5+
6+
class PresignedWriteUrls(BaseModel):
7+
"""
8+
Context object for managing upload information.
9+
"""
10+
11+
blob_id: str
12+
urls: list[str]
13+
chunksize: int
14+
upload_id: Optional[str] = None
15+
etags: Optional[list[str]] = None
16+
block_ids: Optional[list[str]] = None
17+
headers: Optional[dict[str, str]] = None
18+
metadata: Optional[dict[str, str]] = None
19+
signature: Optional[str] = None
20+
21+
22+
class GeneratePresignedUrlRequest(BaseModel):
23+
"""
24+
Response model for generating presigned URLs.
25+
"""
26+
27+
filesize: int = Field(..., description="The size of the file you want to upload in bytes.", ge=0)
28+
check_access: bool = Field(..., description="Whether to check access permissions.")
29+
persno: str = Field(..., description="The persno of the user who is uploading the file.")
30+
lock_id: str = Field(..., description="Provide some random string to lock the file for upload.")
31+
32+
33+
class CompleteFileUploadRequest(BaseModel):
34+
"""
35+
Request model for completing a file upload.
36+
"""
37+
38+
filesize: int = Field(..., description="The size of the file you want to upload in bytes.", ge=0)
39+
check_access: bool = Field(..., description="Whether to check access permissions.")
40+
persno: str = Field(..., description="The persno of the user who is uploading the file.")
41+
presigned_write_urls: PresignedWriteUrls = Field(..., description="The presigned write URLs for the file upload.")
42+
sha256: Optional[str] = Field(None, description="The SHA256 hash of the file content.")
43+
lock_id: str = Field(..., description="The lock ID the file was locked with")
44+
delete_derived_files: bool = Field(True, description="Whether to delete derived files (e.g. converted pdfs).")
45+
46+
47+
class AbortFileUploadRequest(BaseModel):
48+
"""
49+
Request model for aborting a file upload.
50+
"""
51+
52+
presigned_write_urls: PresignedWriteUrls = Field(..., description="The presigned write URLs for the file upload.")
53+
lock_id: str = Field(..., description="The lock ID the file was locked with")
54+
persno: str = Field(..., description="The persno of the user who is uploading the file.")
55+
56+
57+
class CreateNewFileRequest(BaseModel):
58+
"""
59+
Request model for creating a new file.
60+
"""
61+
62+
parent_object_id: str = Field(..., description="cdb_object_id of the object the file should be attached to.")
63+
filename: str = Field(..., description="The name of the file to create.")
64+
persno: str = Field(..., description="The persno of the user creating the file.")
65+
check_access: bool = Field(..., description="Whether to check access permissions.")
66+
67+
68+
class CreateNewFileResponse(BaseModel):
69+
"""
70+
Response model for creating a new file.
71+
"""
72+
73+
file_object_id: str = Field(..., description="The cdb_object_id of the newly created file.")

poetry.lock

Lines changed: 12 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ mkdocs = "^1.6.1"
3434
mkdocs-material = "^9.6.14"
3535
mkdocs-link-marker = "^0.1.3"
3636
types-requests = "^2.32.4.20250809"
37+
types-pyyaml = "^6.0.12.20250822"
3738

3839
[tool.ruff]
3940
line-length = 120

0 commit comments

Comments
 (0)