Skip to content

Commit 4fefd8f

Browse files
authored
Is1865/fix download outputs (#2275)
Outputs produced by a project cannot be retrieved as a file resource in the public-api because their naming is incompatible. This PR overcomes this problem by creating a soft link to the output files with a name compatible with the public api *api-server ADDED: creates soft links to output files named api/{file_id}/{file_name} CHANGED: extends testing *storage ADDED: new entry to create soft links from existing file POST /files/{file_id}:soft-copy CHANGED: new column file_meta_data.is_soft_link: bool CHANGED: refactors routing CHANGED: extends testing *simcore-sdk CHANGED: improved test fixture store_link to upload properly, i.e. via storage service
1 parent 691bd88 commit 4fefd8f

File tree

46 files changed

+893
-431
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+893
-431
lines changed

api/specs/storage/openapi.yaml

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -417,6 +417,40 @@ paths:
417417
"204":
418418
description: folder has been successfully deleted
419419

420+
/files/{file_id}:soft-copy:
421+
post:
422+
summary: Copy as soft link
423+
operationId: copy_as_soft_link
424+
parameters:
425+
- name: file_id
426+
in: path
427+
required: true
428+
schema:
429+
type: string
430+
- name: user_id
431+
in: query
432+
required: true
433+
schema:
434+
type: integer
435+
requestBody:
436+
content:
437+
application/json:
438+
schema:
439+
type: object
440+
required:
441+
- link_id
442+
properties:
443+
link_id:
444+
type: string
445+
responses:
446+
"200":
447+
description: "Returns link metadata"
448+
content:
449+
application/json:
450+
schema:
451+
$ref: "#/components/schemas/FileMetaDataEnveloped"
452+
default:
453+
$ref: "#/components/responses/DefaultErrorResponse"
420454
components:
421455
schemas:
422456
HealthCheckEnveloped:

packages/models-library/src/models_library/api_schemas_storage.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,11 @@ class FileMetaData(BaseModel):
153153
entity_tag: Optional[str] = Field(
154154
description="Entity tag (or ETag), represents a specific version of the file",
155155
)
156+
is_soft_link: bool = Field(
157+
False,
158+
description="If true, this file is a soft link."
159+
"i.e. is another entry with the same object_name",
160+
)
156161

157162
parent_id: Optional[str]
158163

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
"""adds file_meta_data.is_soft_link column
2+
3+
Revision ID: be29dbed0cce
4+
Revises: e43bd59a8e17
5+
Create Date: 2021-04-15 08:10:50.878539+00:00
6+
7+
"""
8+
from alembic import op
9+
import sqlalchemy as sa
10+
11+
12+
# revision identifiers, used by Alembic.
13+
revision = 'be29dbed0cce'
14+
down_revision = 'e43bd59a8e17'
15+
branch_labels = None
16+
depends_on = None
17+
18+
19+
def upgrade():
20+
# ### commands auto generated by Alembic - please adjust! ###
21+
op.add_column('file_meta_data', sa.Column('is_soft_link', sa.Boolean(), server_default=sa.text('false'), nullable=False))
22+
# ### end Alembic commands ###
23+
24+
25+
def downgrade():
26+
# ### commands auto generated by Alembic - please adjust! ###
27+
op.drop_column('file_meta_data', 'is_soft_link')
28+
# ### end Alembic commands ###

packages/postgres-database/src/simcore_postgres_database/models/file_meta_data.py

Lines changed: 33 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -5,25 +5,37 @@
55
file_meta_data = sa.Table(
66
"file_meta_data",
77
metadata,
8-
sa.Column("file_uuid", sa.String, primary_key=True),
9-
sa.Column("location_id", sa.String),
10-
sa.Column("location", sa.String),
11-
sa.Column("bucket_name", sa.String),
12-
sa.Column("object_name", sa.String),
13-
sa.Column("project_id", sa.String),
14-
sa.Column("project_name", sa.String),
15-
sa.Column("node_id", sa.String),
16-
sa.Column("node_name", sa.String),
17-
sa.Column("file_name", sa.String),
18-
sa.Column("user_id", sa.String),
19-
sa.Column("user_name", sa.String),
20-
sa.Column("file_id", sa.String),
21-
sa.Column("raw_file_path", sa.String),
22-
sa.Column("display_file_path", sa.String),
23-
sa.Column("created_at", sa.String),
24-
sa.Column("last_modified", sa.String),
25-
sa.Column("file_size", sa.BigInteger),
26-
# Entity tag (or ETag), represents a specific version of the object.
27-
# SEE https://docs.aws.amazon.com/AmazonS3/latest/API/RESTCommonResponseHeaders.html)
28-
sa.Column("entity_tag", sa.String, nullable=True),
8+
sa.Column("file_uuid", sa.String(), primary_key=True),
9+
sa.Column("location_id", sa.String()),
10+
sa.Column("location", sa.String()),
11+
sa.Column("bucket_name", sa.String()),
12+
sa.Column("object_name", sa.String()),
13+
sa.Column("project_id", sa.String()),
14+
sa.Column("project_name", sa.String()),
15+
sa.Column("node_id", sa.String()),
16+
sa.Column("node_name", sa.String()),
17+
sa.Column("file_name", sa.String()),
18+
sa.Column("user_id", sa.String()),
19+
sa.Column("user_name", sa.String()),
20+
sa.Column("file_id", sa.String()),
21+
sa.Column("raw_file_path", sa.String()),
22+
sa.Column("display_file_path", sa.String()),
23+
sa.Column("created_at", sa.String()),
24+
sa.Column("last_modified", sa.String()),
25+
sa.Column("file_size", sa.BigInteger()),
26+
sa.Column(
27+
"entity_tag",
28+
sa.String(),
29+
nullable=True,
30+
doc="Entity tag (or ETag), represents a specific version of the object"
31+
"SEE https://docs.aws.amazon.com/AmazonS3/latest/API/RESTCommonResponseHeaders.html",
32+
),
33+
sa.Column(
34+
"is_soft_link",
35+
sa.Boolean(),
36+
nullable=False,
37+
server_default=sa.text("false"),
38+
doc="If true, this file is a soft link."
39+
"i.e. is another entry with the same object_name",
40+
),
2941
)

packages/pytest-simcore/src/pytest_simcore/minio_service.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ def wait_till_minio_responsive(minio_config: Dict[str, str]) -> bool:
7474
def bucket(minio_config: Dict[str, str], minio_service: S3Client) -> str:
7575
bucket_name = minio_config["bucket_name"]
7676
minio_service.create_bucket(bucket_name, delete_contents_if_exists=True)
77+
7778
yield bucket_name
7879

7980
minio_service.remove_bucket(bucket_name, delete_contents=True)

packages/pytest-simcore/src/pytest_simcore/simcore_services.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -63,10 +63,7 @@ async def simcore_services(services_endpoint: Dict[str, URL], monkeypatch) -> No
6363
async def wait_till_service_responsive(endpoint: URL):
6464
async with aiohttp.ClientSession() as session:
6565
async with session.get(endpoint) as resp:
66+
# NOTE: Health-check endpoint require only a
67+
# status code 200 (see e.g. services/web/server/docker/healthcheck.py)
68+
# regardless of the payload content
6669
assert resp.status == 200
67-
data = await resp.json()
68-
# aiohttp based services are like this:
69-
assert "data" in data or ":-)" in data or ":-)" in data.get("msg")
70-
if "data" in data:
71-
assert "status" in data["data"]
72-
assert data["data"]["status"] == "SERVICE_RUNNING"

packages/pytest-simcore/src/pytest_simcore/simcore_storage_service.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,5 +50,4 @@ async def wait_till_storage_responsive(storage_endpoint: URL):
5050
assert resp.status == 200
5151
data = await resp.json()
5252
assert "data" in data
53-
assert "status" in data["data"]
54-
assert data["data"]["status"] == "SERVICE_RUNNING"
53+
assert data["data"] is not None

packages/pytest-simcore/src/pytest_simcore/simcore_webserver_service.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,8 @@
77
import aiohttp
88
import pytest
99
import tenacity
10-
from yarl import URL
11-
1210
from servicelib.minio_utils import MinioRetryPolicyUponInitialization
11+
from yarl import URL
1312

1413
from .helpers.utils_docker import get_service_published_port
1514

@@ -36,8 +35,7 @@ async def webserver_service(webserver_endpoint: URL, docker_stack: Dict) -> URL:
3635
async def wait_till_webserver_responsive(webserver_endpoint: URL):
3736
async with aiohttp.ClientSession() as session:
3837
async with session.get(webserver_endpoint.with_path("/v0/")) as resp:
38+
# NOTE: Health-check endpoint require only a
39+
# status code 200 (see e.g. services/web/server/docker/healthcheck.py)
40+
# regardless of the payload content
3941
assert resp.status == 200
40-
data = await resp.json()
41-
assert "data" in data
42-
assert "status" in data["data"]
43-
assert data["data"]["status"] == "SERVICE_RUNNING"

packages/simcore-sdk/src/simcore_sdk/node_ports/filemanager.py

Lines changed: 27 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -100,35 +100,40 @@ async def _get_location_id_from_location_name(store: str, api: UsersApi):
100100

101101
async def _get_link(store_id: int, file_id: str, apifct) -> URL:
102102
log.debug("Getting link from store id %s for %s", store_id, file_id)
103-
try:
104-
# When uploading and downloading files from the storage service
105-
# it is important to use a longer timeout, previously was 5 minutes
106-
# changing to 1 hour. this will allow for larger payloads to be stored/download
107-
resp = await apifct(
108-
location_id=store_id,
109-
user_id=config.USER_ID,
110-
file_id=file_id,
111-
_request_timeout=ServicesCommonSettings().storage_service_upload_download_timeout,
112-
)
103+
# When uploading and downloading files from the storage service
104+
# it is important to use a longer timeout, previously was 5 minutes
105+
# changing to 1 hour. this will allow for larger payloads to be stored/download
106+
resp = await apifct(
107+
location_id=store_id,
108+
user_id=config.USER_ID,
109+
file_id=file_id,
110+
_request_timeout=ServicesCommonSettings().storage_service_upload_download_timeout,
111+
)
113112

114-
if resp.error:
115-
raise exceptions.S3TransferError(
116-
"Error getting link: {}".format(resp.error.to_str())
117-
)
118-
if not resp.data.link:
119-
raise exceptions.S3InvalidPathError(file_id)
120-
log.debug("Got link %s", resp.data.link)
121-
return URL(resp.data.link)
122-
except ApiException as err:
123-
_handle_api_exception(store_id, err)
113+
if resp.error:
114+
raise exceptions.S3TransferError(
115+
"Error getting link: {}".format(resp.error.to_str())
116+
)
117+
if not resp.data.link:
118+
raise exceptions.S3InvalidPathError(file_id)
119+
log.debug("Got link %s", resp.data.link)
120+
return URL(resp.data.link)
124121

125122

126123
async def _get_download_link(store_id: int, file_id: str, api: UsersApi) -> URL:
127-
return await _get_link(store_id, file_id, api.download_file)
124+
try:
125+
return await _get_link(store_id, file_id, api.download_file)
126+
except ApiException as err:
127+
if err.status == 404:
128+
raise exceptions.InvalidDownloadLinkError(None) from err
129+
_handle_api_exception(store_id, err)
128130

129131

130132
async def _get_upload_link(store_id: int, file_id: str, api: UsersApi) -> URL:
131-
return await _get_link(store_id, file_id, api.upload_file)
133+
try:
134+
return await _get_link(store_id, file_id, api.upload_file)
135+
except ApiException as err:
136+
_handle_api_exception(store_id, err)
132137

133138

134139
async def _download_link_to_file(session: ClientSession, url: URL, file_path: Path):

packages/simcore-sdk/tests/integration/conftest.py

Lines changed: 38 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111

1212
import np_helpers
1313
import pytest
14+
import requests
15+
import simcore_service_storage_sdk
1416
import sqlalchemy as sa
1517
from pytest_simcore.helpers.rawdata_fakers import random_project, random_user
1618
from simcore_postgres_database.storage_models import projects, users
@@ -88,12 +90,8 @@ async def filemanager_cfg(
8890

8991
@pytest.fixture
9092
def create_valid_file_uuid(project_id: str, node_uuid: str) -> Callable:
91-
def create(file_path: Path, project: str = None, node: str = None):
92-
if project is None:
93-
project = project_id
94-
if node is None:
95-
node = node_uuid
96-
return np_helpers.file_uuid(file_path, project, node)
93+
def create(file_path: Path):
94+
return np_helpers.file_uuid(file_path, project_id, node_uuid)
9795

9896
return create
9997

@@ -131,18 +129,39 @@ def create_node_link(key: str) -> Dict[str, str]:
131129

132130
@pytest.fixture()
133131
def store_link(
134-
minio_service, bucket, create_valid_file_uuid, s3_simcore_location
132+
bucket: str, # packages/pytest-simcore/src/pytest_simcore/minio_service.py
133+
create_valid_file_uuid: Callable,
134+
s3_simcore_location: str,
135+
user_id: int,
136+
project_id: str,
137+
node_uuid: str,
138+
storage_service: URL, # packages/pytest-simcore/src/pytest_simcore/simcore_storage_service.py
135139
) -> Callable:
136-
def create_store_link(
137-
file_path: Path, project_id: str = None, node_id: str = None
138-
) -> Dict[str, str]:
139-
# upload the file to S3
140-
assert Path(file_path).exists()
141-
file_id = create_valid_file_uuid(file_path, project_id, node_id)
142-
# using the s3 client the path must be adapted
143-
# TODO: use the storage sdk instead
144-
s3_object = Path(project_id, node_id, Path(file_path).name).as_posix()
145-
minio_service.upload_file(bucket, s3_object, str(file_path))
140+
async def create_store_link(file_path: Path) -> Dict[str, str]:
141+
file_path = Path(file_path)
142+
assert file_path.exists()
143+
144+
file_id = create_valid_file_uuid(file_path)
145+
146+
# Get upload presigned link via storage-sdk API (both pg and s3 get updated)
147+
user_api = simcore_service_storage_sdk.UsersApi()
148+
user_api.api_client.configuration.host = str(storage_service.with_path("/v0"))
149+
150+
r: simcore_service_storage_sdk.PresignedLinkEnveloped = (
151+
await user_api.upload_file(file_id, s3_simcore_location, user_id)
152+
)
153+
154+
# Upload using the link
155+
extra_hdr = {
156+
"Content-Length": f"{file_path.stat().st_size}",
157+
"Content-Type": "application/binary",
158+
}
159+
upload = requests.put(
160+
r.data.link, data=file_path.read_bytes(), headers=extra_hdr
161+
)
162+
assert upload.status_code == 200, upload.text
163+
164+
# FIXME: that at this point, S3 and pg have some data that is NOT cleaned up
146165
return {"store": s3_simcore_location, "path": file_id}
147166

148167
yield create_store_link
@@ -189,7 +208,7 @@ def special_2nodes_configuration(
189208
empty_configuration_file: Path,
190209
project_id: str,
191210
node_uuid: str,
192-
):
211+
) -> Callable:
193212
def create_config(
194213
prev_node_inputs: List[Tuple[str, str, Any]] = None,
195214
prev_node_outputs: List[Tuple[str, str, Any]] = None,
@@ -228,6 +247,7 @@ def create_config(
228247
return config_dict, project_id, node_uuid
229248

230249
yield create_config
250+
231251
# teardown
232252
postgres_session.query(ComputationalTask).delete()
233253
postgres_session.query(ComputationalPipeline).delete()

0 commit comments

Comments
 (0)