Skip to content

Commit 85c998f

Browse files
authored
✨ Use S3 links in internal computational backend cluster, prepare for temporary tokens (⚠️ devops) (ITISFoundation#3006)
1 parent 122703f commit 85c998f

File tree

38 files changed

+770
-115
lines changed

38 files changed

+770
-115
lines changed

api/specs/storage/openapi.yaml

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -383,6 +383,26 @@ paths:
383383
default:
384384
$ref: "#/components/responses/DefaultErrorResponse"
385385

386+
/simcore-s3:access:
387+
post:
388+
summary: Returns the temporary access credentials for the user storage space
389+
operationId: get_or_create_temporary_s3_access
390+
parameters:
391+
- name: user_id
392+
in: query
393+
required: true
394+
schema:
395+
type: integer
396+
responses:
397+
"200":
398+
description: the S3 access credentials
399+
content:
400+
application/json:
401+
schema:
402+
$ref: "#/components/schemas/S3AccessCredentialsEnveloped"
403+
default:
404+
$ref: "#/components/responses/DefaultErrorResponse"
405+
386406
/simcore-s3/files/metadata:search:
387407
post:
388408
summary: Returns metadata for all files matching a pattern
@@ -880,6 +900,35 @@ components:
880900
example:
881901
link: "example_link"
882902

903+
S3AccessCredentialsEnveloped:
904+
type: object
905+
required:
906+
- data
907+
- error
908+
properties:
909+
data:
910+
$ref: "#/components/schemas/S3AccessCredentials"
911+
error:
912+
nullable: true
913+
default: null
914+
915+
S3AccessCredentials:
916+
type: object
917+
required:
918+
- access
919+
- secret
920+
- token
921+
- endpoint
922+
properties:
923+
access:
924+
type: string
925+
secret:
926+
type: string
927+
token:
928+
type: string
929+
endpoint:
930+
type: string
931+
883932
Project:
884933
$ref: "../common/schemas/project.yaml#/components/schemas/ProjectIn"
885934

packages/dask-task-models-library/src/dask_task_models_library/container_tasks/io.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from pathlib import Path
44
from typing import Any, Dict, List, Optional, Union, cast
55

6+
from models_library.basic_regex import MIME_TYPE_RE
67
from models_library.generics import DictModel
78
from models_library.services import PROPERTY_KEY_RE
89
from pydantic import (
@@ -51,7 +52,7 @@ class Config(PortSchema.Config):
5152
},
5253
{
5354
"required": False,
54-
"url": "ftp://some_file_url",
55+
"url": "s3://another_file_url",
5556
},
5657
]
5758
}
@@ -63,15 +64,20 @@ class FileUrl(BaseModel):
6364
None,
6465
description="Local file relpath name (if given), otherwise it takes the url filename",
6566
)
67+
file_mime_type: Optional[str] = Field(
68+
None, description="the file MIME type", regex=MIME_TYPE_RE
69+
)
6670

6771
class Config:
6872
extra = Extra.forbid
6973
schema_extra = {
7074
"examples": [
75+
{"url": "https://some_file_url", "file_mime_type": "application/json"},
7176
{
7277
"url": "https://some_file_url",
78+
"file_mapping": "some_file_name.txt",
79+
"file_mime_type": "application/json",
7380
},
74-
{"url": "s3://some_file_url", "file_mapping": "some_file_name.txt"},
7581
]
7682
}
7783

@@ -98,7 +104,7 @@ class Config(DictModel.Config):
98104
"int_input": -45,
99105
"float_input": 4564.45,
100106
"string_input": "nobody thinks like a string",
101-
"file_input": {"url": "s3://some_file_url"},
107+
"file_input": {"url": "s3://thatis_file_url"},
102108
},
103109
]
104110
}

packages/models-library/src/models_library/basic_regex.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,7 @@
2525
# - cannot start with spaces, _ (we only want public) or numbers
2626
# https://docs.python.org/3/reference/lexical_analysis.html#identifiers
2727
PUBLIC_VARIABLE_NAME_RE = r"^[^_\W0-9]\w*$"
28+
29+
MIME_TYPE_RE = (
30+
r"([\w\*]*)\/(([\w\-\*]+\.)+)?([\w\-\*]+)(\+([\w\-\.]+))?(; ([\w+-\.=]+))?"
31+
)

packages/models-library/src/models_library/projects_nodes_io.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -88,12 +88,12 @@ class BaseFileLink(BaseModel):
8888
)
8989

9090
label: Optional[str] = Field(
91-
None,
91+
default=None,
9292
description="The real file name",
9393
)
9494

9595
e_tag: Optional[str] = Field(
96-
None,
96+
default=None,
9797
description="Entity tag that uniquely represents the file. The method to generate the tag is not specified (black box).",
9898
alias="eTag",
9999
)
@@ -103,7 +103,7 @@ class SimCoreFileLink(BaseFileLink):
103103
"""I/O port type to hold a link to a file in simcore S3 storage"""
104104

105105
dataset: Optional[str] = Field(
106-
None,
106+
default=None,
107107
deprecated=True
108108
# TODO: Remove with storage refactoring
109109
)

packages/pytest-simcore/src/pytest_simcore/services_api_mocks_for_aiohttp_clients.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -334,10 +334,13 @@ async def storage_v0_service_mock(
334334

335335
def get_download_link_cb(url: URL, **kwargs) -> CallbackResult:
336336
file_id = url.path.rsplit("/files/")[1]
337-
337+
assert "params" in kwargs
338+
assert "link_type" in kwargs["params"]
339+
link_type = kwargs["params"]["link_type"]
340+
scheme = {"presigned": "http", "s3": "s3"}
338341
return CallbackResult(
339342
status=web.HTTPOk.status_code,
340-
payload={"data": {"link": f"file://{file_id}"}},
343+
payload={"data": {"link": f"{scheme[link_type]}://{file_id}"}},
341344
)
342345

343346
get_file_metadata_pattern = re.compile(

packages/simcore-sdk/src/simcore_sdk/node_ports_common/filemanager.py

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -37,29 +37,31 @@ async def _get_download_link(
3737
store_id: str,
3838
file_id: str,
3939
session: ClientSession,
40+
link_type: storage_client.LinkType,
4041
) -> URL:
41-
presigned_link: AnyUrl = await storage_client.get_download_file_presigned_link(
42-
session, file_id, store_id, user_id
42+
link: AnyUrl = await storage_client.get_download_file_link(
43+
session, file_id, store_id, user_id, link_type
4344
)
44-
if not presigned_link:
45+
if not link:
4546
raise exceptions.S3InvalidPathError(file_id)
4647

47-
return URL(presigned_link)
48+
return URL(link)
4849

4950

5051
async def _get_upload_link(
5152
user_id: int,
5253
store_id: str,
5354
file_id: str,
5455
session: ClientSession,
56+
link_type: storage_client.LinkType,
5557
) -> URL:
56-
presigned_link: AnyUrl = await storage_client.get_upload_file_presigned_link(
57-
session, file_id, store_id, user_id
58+
link: AnyUrl = await storage_client.get_upload_file_link(
59+
session, file_id, store_id, user_id, link_type
5860
)
59-
if not presigned_link:
61+
if not link:
6062
raise exceptions.S3InvalidPathError(file_id)
6163

62-
return URL(presigned_link)
64+
return URL(link)
6365

6466

6567
async def _download_link_to_file(session: ClientSession, url: URL, file_path: Path):
@@ -140,6 +142,7 @@ async def get_download_link_from_s3(
140142
store_name: Optional[str],
141143
store_id: Optional[str],
142144
s3_object: str,
145+
link_type: storage_client.LinkType,
143146
client_session: Optional[ClientSession] = None,
144147
) -> Optional[URL]:
145148
if store_name is None and store_id is None:
@@ -151,7 +154,9 @@ async def get_download_link_from_s3(
151154
user_id, store_name, session
152155
)
153156
assert store_id is not None # nosec
154-
return await _get_download_link(user_id, store_id, s3_object, session)
157+
return await _get_download_link(
158+
user_id, store_id, s3_object, session, link_type
159+
)
155160

156161

157162
async def get_upload_link_from_s3(
@@ -160,6 +165,7 @@ async def get_upload_link_from_s3(
160165
store_name: Optional[str],
161166
store_id: Optional[str],
162167
s3_object: str,
168+
link_type: storage_client.LinkType,
163169
client_session: Optional[ClientSession] = None,
164170
) -> Tuple[str, URL]:
165171
if store_name is None and store_id is None:
@@ -173,7 +179,7 @@ async def get_upload_link_from_s3(
173179
assert store_id is not None # nosec
174180
return (
175181
store_id,
176-
await _get_upload_link(user_id, store_id, s3_object, session),
182+
await _get_upload_link(user_id, store_id, s3_object, session, link_type),
177183
)
178184

179185

@@ -210,6 +216,7 @@ async def download_file_from_s3(
210216
store_id=store_id,
211217
s3_object=s3_object,
212218
client_session=session,
219+
link_type=storage_client.LinkType.PRESIGNED,
213220
)
214221

215222
# the link contains the file name
@@ -274,6 +281,7 @@ async def upload_file(
274281
store_id=store_id,
275282
s3_object=s3_object,
276283
client_session=session,
284+
link_type=storage_client.LinkType.PRESIGNED,
277285
)
278286

279287
if not upload_link:

packages/simcore-sdk/src/simcore_sdk/node_ports_common/storage_client.py

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from enum import Enum
12
from functools import wraps
23
from json import JSONDecodeError
34
from typing import Any, Callable, Dict
@@ -61,9 +62,18 @@ async def get_storage_locations(
6162
return locations_enveloped.data
6263

6364

65+
class LinkType(str, Enum):
66+
PRESIGNED = "presigned"
67+
S3 = "s3"
68+
69+
6470
@handle_client_exception
65-
async def get_download_file_presigned_link(
66-
session: ClientSession, file_id: str, location_id: str, user_id: UserID
71+
async def get_download_file_link(
72+
session: ClientSession,
73+
file_id: str,
74+
location_id: str,
75+
user_id: UserID,
76+
link_type: LinkType,
6777
) -> AnyUrl:
6878
if (
6979
not isinstance(file_id, str)
@@ -80,7 +90,7 @@ async def get_download_file_presigned_link(
8090

8191
async with session.get(
8292
f"{_base_url()}/locations/{location_id}/files/{quote(file_id, safe='')}",
83-
params={"user_id": f"{user_id}"},
93+
params={"user_id": f"{user_id}", "link_type": link_type.value},
8494
) as response:
8595
response.raise_for_status()
8696

@@ -93,8 +103,12 @@ async def get_download_file_presigned_link(
93103

94104

95105
@handle_client_exception
96-
async def get_upload_file_presigned_link(
97-
session: ClientSession, file_id: str, location_id: str, user_id: UserID
106+
async def get_upload_file_link(
107+
session: ClientSession,
108+
file_id: str,
109+
location_id: str,
110+
user_id: UserID,
111+
link_type: LinkType,
98112
) -> AnyUrl:
99113
if (
100114
not isinstance(file_id, str)
@@ -110,7 +124,7 @@ async def get_upload_file_presigned_link(
110124
)
111125
async with session.put(
112126
f"{_base_url()}/locations/{location_id}/files/{quote(file_id, safe='')}",
113-
params={"user_id": f"{user_id}"},
127+
params={"user_id": f"{user_id}", "link_type": link_type.value},
114128
) as response:
115129
response.raise_for_status()
116130

packages/simcore-sdk/src/simcore_sdk/node_ports_v2/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from ..node_ports_common import config as node_config
55
from ..node_ports_common import exceptions
66
from ..node_ports_common.dbmanager import DBManager
7+
from ..node_ports_common.storage_client import LinkType as FileLinkType
78
from .nodeports_v2 import Nodeports
89
from .port import Port
910
from .serialization_v2 import load
@@ -33,4 +34,4 @@ async def ports(
3334
)
3435

3536

36-
__all__ = ["ports", "node_config", "exceptions", "Port"]
37+
__all__ = ("ports", "node_config", "exceptions", "Port", "FileLinkType")

packages/simcore-sdk/src/simcore_sdk/node_ports_v2/nodeports_v2.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
from pydantic import BaseModel, Field
77
from servicelib.utils import logged_gather
8+
from simcore_sdk.node_ports_common.storage_client import LinkType
89

910
from ..node_ports_common.dbmanager import DBManager
1011
from ..node_ports_common.exceptions import PortNotFound, UnboundPortError
@@ -55,14 +56,20 @@ async def outputs(self) -> OutputsList:
5556
await self._auto_update_from_db()
5657
return self.internal_outputs
5758

58-
async def get_value_link(self, item_key: str) -> Optional[ItemValue]:
59+
async def get_value_link(
60+
self, item_key: str, *, file_link_type: LinkType
61+
) -> Optional[ItemValue]:
5962
try:
60-
return await (await self.inputs)[item_key].get_value()
63+
return await (await self.inputs)[item_key].get_value(
64+
file_link_type=file_link_type
65+
)
6166
except UnboundPortError:
6267
# not available try outputs
6368
pass
6469
# if this fails it will raise an exception
65-
return await (await self.outputs)[item_key].get_value()
70+
return await (await self.outputs)[item_key].get_value(
71+
file_link_type=file_link_type
72+
)
6673

6774
async def get(self, item_key: str) -> Optional[ItemConcreteValue]:
6875
try:

packages/simcore-sdk/src/simcore_sdk/node_ports_v2/port.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import jsonschema
88
from models_library.services import PROPERTY_KEY_RE, BaseServiceIOModel
99
from pydantic import AnyUrl, Field, PrivateAttr, validator
10+
from simcore_sdk.node_ports_common.storage_client import LinkType
1011

1112
from ..node_ports_common.exceptions import (
1213
AbsoluteSymlinkIsNotUploadableException,
@@ -125,13 +126,18 @@ def __init__(self, **data: Any):
125126
assert self._py_value_type # nosec
126127
assert self._py_value_converter # nosec
127128

128-
async def get_value(self) -> Optional[ItemValue]:
129+
async def get_value(
130+
self, *, file_link_type: Optional[LinkType] = None
131+
) -> Optional[ItemValue]:
129132
"""returns the value of the link after resolving the port links"""
133+
if not file_link_type:
134+
file_link_type = LinkType.PRESIGNED
130135
log.debug(
131-
"getting value of %s[%s] containing '%s'",
136+
"getting value of %s[%s] containing '%s' using %s",
132137
self.key,
133138
self.property_type,
134139
pformat(self.value),
140+
file_link_type,
135141
)
136142

137143
if isinstance(self.value, PortLink):
@@ -140,12 +146,14 @@ async def get_value(self) -> Optional[ItemValue]:
140146
# pylint: disable=protected-access
141147
self.value,
142148
self._node_ports._node_ports_creator_cb,
149+
file_link_type=file_link_type,
143150
)
144151
if isinstance(self.value, FileLink):
145152
# let's get the download/upload link from storage
146153
return await port_utils.get_download_link_from_storage(
147154
user_id=self._node_ports.user_id,
148155
value=self.value,
156+
link_type=file_link_type,
149157
)
150158

151159
if isinstance(self.value, DownloadLink):

0 commit comments

Comments
 (0)