Skip to content

Commit 97cd806

Browse files
committed
first effort to allow to upload directly into program job
1 parent 3ca72d7 commit 97cd806

File tree

6 files changed

+234
-118
lines changed

6 files changed

+234
-118
lines changed

services/api-server/src/simcore_service_api_server/api/routes/files.py

Lines changed: 29 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,15 @@
3232
from starlette.responses import RedirectResponse
3333

3434
from ...exceptions.service_errors_utils import DEFAULT_BACKEND_SERVICE_STATUS_CODES
35+
from ...models.domain.files import File as DomainFile
3536
from ...models.pagination import Page, PaginationParams
3637
from ...models.schemas.errors import ErrorGet
3738
from ...models.schemas.files import (
3839
ClientFile,
3940
ClientFileUploadData,
40-
File,
41+
)
42+
from ...models.schemas.files import File as OutputFile
43+
from ...models.schemas.files import (
4144
FileUploadData,
4245
UploadLinks,
4346
)
@@ -70,14 +73,14 @@ async def _get_file(
7073
file_id: UUID,
7174
storage_client: StorageApi,
7275
user_id: int,
73-
):
76+
) -> DomainFile:
7477
"""Gets metadata for a given file resource"""
7578

7679
try:
77-
stored_files: list[
78-
StorageFileMetaData
79-
] = await storage_client.search_owned_files(
80-
user_id=user_id, file_id=file_id, limit=1
80+
stored_files: list[StorageFileMetaData] = (
81+
await storage_client.search_owned_files(
82+
user_id=user_id, file_id=file_id, limit=1
83+
)
8184
)
8285
if not stored_files:
8386
msg = "Not found in storage"
@@ -98,7 +101,7 @@ async def _get_file(
98101
) from err
99102

100103

101-
@router.get("", response_model=list[File], responses=_FILE_STATUS_CODES)
104+
@router.get("", response_model=list[OutputFile], responses=_FILE_STATUS_CODES)
102105
async def list_files(
103106
storage_client: Annotated[StorageApi, Depends(get_api_client(StorageApi))],
104107
user_id: Annotated[int, Depends(get_current_user_id)],
@@ -113,12 +116,12 @@ async def list_files(
113116
)
114117

115118
# Adapts storage API model to API model
116-
all_files: list[File] = []
119+
all_files: list[OutputFile] = []
117120
for stored_file_meta in stored_files:
118121
try:
119122
assert stored_file_meta.file_id # nosec
120123

121-
file_meta: File = to_file_api_model(stored_file_meta)
124+
file_meta = to_file_api_model(stored_file_meta)
122125

123126
except (ValidationError, ValueError, AttributeError) as err:
124127
_logger.warning(
@@ -129,14 +132,14 @@ async def list_files(
129132
)
130133

131134
else:
132-
all_files.append(file_meta)
135+
all_files.append(OutputFile.from_domain_model(file_meta))
133136

134137
return all_files
135138

136139

137140
@router.get(
138141
"/page",
139-
response_model=Page[File],
142+
response_model=Page[OutputFile],
140143
include_in_schema=API_SERVER_DEV_FEATURES_ENABLED,
141144
status_code=status.HTTP_501_NOT_IMPLEMENTED,
142145
)
@@ -161,7 +164,7 @@ def _get_spooled_file_size(file_io: IO) -> int:
161164

162165
@router.put(
163166
"/content",
164-
response_model=File,
167+
response_model=OutputFile,
165168
responses=_FILE_STATUS_CODES,
166169
)
167170
@cancel_on_disconnect
@@ -187,7 +190,7 @@ async def upload_file(
187190
None, _get_spooled_file_size, file.file
188191
)
189192
# assign file_id.
190-
file_meta: File = await File.create_from_uploaded(
193+
file_meta = await DomainFile.create_from_uploaded(
191194
file,
192195
file_size=file_size,
193196
created_at=datetime.datetime.now(datetime.UTC).isoformat(),
@@ -216,7 +219,7 @@ async def upload_file(
216219
assert isinstance(upload_result, UploadedFile) # nosec
217220

218221
file_meta.e_tag = upload_result.etag
219-
return file_meta
222+
return OutputFile.from_domain_model(file_meta)
220223

221224

222225
# NOTE: MaG suggested a single function that can upload one or multiple files instead of having
@@ -244,7 +247,7 @@ async def get_upload_links(
244247
):
245248
"""Get upload links for uploading a file to storage"""
246249
assert request # nosec
247-
file_meta: File = await File.create_from_client_file(
250+
file_meta = await DomainFile.create_from_client_file(
248251
client_file,
249252
datetime.datetime.now(datetime.UTC).isoformat(),
250253
)
@@ -275,7 +278,7 @@ async def get_upload_links(
275278

276279
@router.get(
277280
"/{file_id}",
278-
response_model=File,
281+
response_model=OutputFile,
279282
responses=_FILE_STATUS_CODES,
280283
)
281284
async def get_file(
@@ -294,7 +297,7 @@ async def get_file(
294297

295298
@router.get(
296299
":search",
297-
response_model=Page[File],
300+
response_model=Page[OutputFile],
298301
responses=_FILE_STATUS_CODES,
299302
)
300303
async def search_files_page(
@@ -317,8 +320,11 @@ async def search_files_page(
317320
raise HTTPException(
318321
status_code=status.HTTP_404_NOT_FOUND, detail="Not found in storage"
319322
)
323+
file_list = [
324+
OutputFile.from_domain_model(to_file_api_model(fmd)) for fmd in stored_files
325+
]
320326
return create_page(
321-
[to_file_api_model(fmd) for fmd in stored_files],
327+
file_list,
322328
total=len(stored_files),
323329
params=page_params,
324330
)
@@ -333,7 +339,7 @@ async def delete_file(
333339
user_id: Annotated[int, Depends(get_current_user_id)],
334340
storage_client: Annotated[StorageApi, Depends(get_api_client(StorageApi))],
335341
):
336-
file: File = await _get_file(
342+
file = await _get_file(
337343
file_id=file_id,
338344
storage_client=storage_client,
339345
user_id=user_id,
@@ -356,7 +362,7 @@ async def abort_multipart_upload(
356362
):
357363
assert request # nosec
358364
assert user_id # nosec
359-
file: File = File(
365+
file = DomainFile(
360366
id=file_id,
361367
filename=client_file.filename,
362368
checksum=client_file.sha256_checksum,
@@ -372,7 +378,7 @@ async def abort_multipart_upload(
372378

373379
@router.post(
374380
"/{file_id}:complete",
375-
response_model=File,
381+
response_model=OutputFile,
376382
responses=_FILE_STATUS_CODES,
377383
)
378384
@cancel_on_disconnect
@@ -387,7 +393,7 @@ async def complete_multipart_upload(
387393
assert request # nosec
388394
assert user_id # nosec
389395

390-
file: File = File(
396+
file = DomainFile(
391397
id=file_id,
392398
filename=client_file.filename,
393399
checksum=client_file.sha256_checksum,
@@ -429,7 +435,7 @@ async def download_file(
429435
):
430436
# NOTE: application/octet-stream is defined as "arbitrary binary data" in RFC 2046,
431437
# gets meta
432-
file_meta: File = await get_file(file_id, storage_client, user_id)
438+
file_meta = await get_file(file_id, storage_client, user_id)
433439

434440
# download from S3 using pre-signed link
435441
presigned_download_link = await storage_client.get_download_link(
Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
from mimetypes import guess_type
2+
from pathlib import Path
3+
from urllib.parse import quote as _quote
4+
from urllib.parse import unquote as _unquote
5+
from uuid import UUID, uuid3
6+
7+
import aiofiles
8+
from fastapi import UploadFile
9+
from models_library.api_schemas_storage.storage_schemas import ETag
10+
from models_library.basic_types import SHA256Str
11+
from models_library.projects_nodes_io import StorageFileID
12+
from pydantic import (
13+
BaseModel,
14+
ConfigDict,
15+
Field,
16+
TypeAdapter,
17+
ValidationInfo,
18+
field_validator,
19+
)
20+
from servicelib.file_utils import create_sha256_checksum
21+
from simcore_service_api_server.models.schemas.files import (
22+
NAMESPACE_FILEID_KEY,
23+
ClientFile,
24+
)
25+
26+
27+
class File(BaseModel):
28+
"""Represents a file stored on the server side i.e. a unique reference to a file in the cloud."""
29+
30+
# WARNING: from pydantic import File as FileParam
31+
# NOTE: see https://ant.apache.org/manual/Tasks/checksum.html
32+
33+
id: UUID = Field(..., description="Resource identifier")
34+
35+
filename: str = Field(..., description="Name of the file with extension")
36+
content_type: str | None = Field(
37+
default=None,
38+
description="Guess of type content [EXPERIMENTAL]",
39+
validate_default=True,
40+
)
41+
sha256_checksum: SHA256Str | None = Field(
42+
default=None,
43+
description="SHA256 hash of the file's content",
44+
alias="checksum", # alias for backwards compatibility
45+
)
46+
e_tag: ETag | None = Field(default=None, description="S3 entity tag")
47+
48+
model_config = ConfigDict(
49+
populate_by_name=True,
50+
json_schema_extra={
51+
"examples": [
52+
# complete
53+
{
54+
"id": "f0e1fb11-208d-3ed2-b5ef-cab7a7398f78",
55+
"filename": "Architecture-of-Scalable-Distributed-ETL-System-whitepaper.pdf",
56+
"content_type": "application/pdf",
57+
"checksum": "1a512547e3ce3427482da14e8c914ecf61da76ad5f749ff532efe906e6bba128",
58+
},
59+
# minimum
60+
{
61+
"id": "f0e1fb11-208d-3ed2-b5ef-cab7a7398f78",
62+
"filename": "whitepaper.pdf",
63+
},
64+
]
65+
},
66+
)
67+
68+
@field_validator("content_type", mode="before")
69+
@classmethod
70+
def guess_content_type(cls, v, info: ValidationInfo):
71+
if v is None:
72+
filename = info.data.get("filename")
73+
if filename:
74+
mime_content_type, _ = guess_type(filename, strict=False)
75+
return mime_content_type
76+
return v
77+
78+
@classmethod
79+
async def create_from_path(cls, path: Path) -> "File":
80+
async with aiofiles.open(path, mode="rb") as file:
81+
sha256check = await create_sha256_checksum(file)
82+
83+
return cls(
84+
id=cls.create_id(sha256check, path.name),
85+
filename=path.name,
86+
checksum=SHA256Str(sha256check),
87+
)
88+
89+
@classmethod
90+
async def create_from_file_link(cls, s3_object_path: str, e_tag: str) -> "File":
91+
filename = Path(s3_object_path).name
92+
return cls(
93+
id=cls.create_id(e_tag, filename),
94+
filename=filename,
95+
e_tag=e_tag,
96+
)
97+
98+
@classmethod
99+
async def create_from_uploaded(
100+
cls, file: UploadFile, *, file_size=None, created_at=None
101+
) -> "File":
102+
sha256check = await create_sha256_checksum(file)
103+
# WARNING: UploadFile wraps a stream and wil checkt its cursor position: file.file.tell() != 0
104+
# WARNING: await file.seek(0) might introduce race condition if not done carefuly
105+
106+
return cls(
107+
id=cls.create_id(sha256check or file_size, file.filename, created_at),
108+
filename=file.filename or "Undefined",
109+
content_type=file.content_type,
110+
checksum=SHA256Str(sha256check),
111+
)
112+
113+
@classmethod
114+
async def create_from_client_file(
115+
cls,
116+
client_file: ClientFile,
117+
created_at: str,
118+
) -> "File":
119+
return cls(
120+
id=cls.create_id(client_file.filesize, client_file.filename, created_at),
121+
filename=client_file.filename,
122+
checksum=client_file.sha256_checksum,
123+
)
124+
125+
@classmethod
126+
async def create_from_quoted_storage_id(cls, quoted_storage_id: str) -> "File":
127+
storage_file_id: StorageFileID = TypeAdapter(StorageFileID).validate_python(
128+
_unquote(quoted_storage_id)
129+
)
130+
_, fid, fname = Path(storage_file_id).parts
131+
return cls(id=UUID(fid), filename=fname, checksum=None)
132+
133+
@classmethod
134+
def create_id(cls, *keys) -> UUID:
135+
return uuid3(NAMESPACE_FILEID_KEY, ":".join(map(str, keys)))
136+
137+
@property
138+
def storage_file_id(self) -> StorageFileID:
139+
"""Get the StorageFileId associated with this file"""
140+
return TypeAdapter(StorageFileID).validate_python(
141+
f"api/{self.id}/{self.filename}"
142+
)
143+
144+
@property
145+
def quoted_storage_file_id(self) -> str:
146+
"""Quoted version of the StorageFileId"""
147+
return _quote(self.storage_file_id, safe="")
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
from models_library.utils.change_case import camel_to_snake
2+
from pydantic import BaseModel, ConfigDict
3+
4+
5+
class ApiServerOutputSchema(BaseModel):
6+
model_config = ConfigDict(
7+
alias_generator=camel_to_snake,
8+
populate_by_name=True,
9+
extra="ignore", # Used to prune extra fields from internal data
10+
frozen=True,
11+
)
12+
13+
14+
class ApiServerInputSchema(BaseModel):
15+
model_config = ConfigDict(
16+
alias_generator=camel_to_snake,
17+
populate_by_name=True,
18+
extra="ignore", # Used to prune extra fields from internal data
19+
frozen=True,
20+
)

0 commit comments

Comments
 (0)