Skip to content

Commit 2415e78

Browse files
authored
✨Storage: provide entrypoint to compute path size (⚠️ devops) (ITISFoundation#7286)
1 parent 5075fe0 commit 2415e78

File tree

33 files changed

+1095
-140
lines changed

33 files changed

+1095
-140
lines changed

.env-devel

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -250,8 +250,6 @@ SMTP_PROTOCOL=UNENCRYPTED
250250
SMTP_USERNAME=it_doesnt_matter
251251

252252
# STORAGE ----
253-
BF_API_KEY=none
254-
BF_API_SECRET=none
255253
STORAGE_ENDPOINT=storage:8080
256254
STORAGE_HOST=storage
257255
STORAGE_LOGLEVEL=INFO

packages/models-library/src/models_library/api_schemas_datcore_adapter/datasets.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,17 @@
11
from datetime import datetime
22
from enum import Enum, unique
33
from pathlib import Path
4+
from typing import Annotated
45

5-
from pydantic import BaseModel, ByteSize
6+
from pydantic import BaseModel, ByteSize, Field
67

78

89
class DatasetMetaData(BaseModel):
910
id: str
1011
display_name: str
12+
size: Annotated[
13+
ByteSize | None, Field(description="Size of the dataset in bytes if available")
14+
]
1115

1216

1317
@unique

packages/models-library/src/models_library/api_schemas_storage/storage_schemas.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -462,3 +462,31 @@ def _update_json_schema_extra(schema: JsonDict) -> None:
462462
model_config = ConfigDict(
463463
extra="forbid", json_schema_extra=_update_json_schema_extra
464464
)
465+
466+
467+
class PathTotalSizeCreate(BaseModel):
468+
path: Path
469+
size: ByteSize
470+
471+
@staticmethod
472+
def _update_json_schema_extra(schema: JsonDict) -> None:
473+
schema.update(
474+
{
475+
"examples": [
476+
# a folder
477+
{
478+
"path": "f8da77a9-24b9-4eab-aee7-1f0608da1e3e",
479+
"size": 15728640,
480+
},
481+
# 1 file
482+
{
483+
"path": f"f8da77a9-24b9-4eab-aee7-1f0608da1e3e/2f94f80f-633e-4dfa-a983-226b7babe3d7/outputs/output5/{FileMetaDataGet.model_json_schema()['examples'][0]['file_name']}",
484+
"size": 1024,
485+
},
486+
]
487+
}
488+
)
489+
490+
model_config = ConfigDict(
491+
extra="forbid", json_schema_extra=_update_json_schema_extra
492+
)

packages/pytest-simcore/src/pytest_simcore/helpers/storage_utils.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import logging
2-
import os
32
from dataclasses import dataclass
43
from pathlib import Path
54
from typing import Any, TypedDict
@@ -14,14 +13,6 @@
1413
log = logging.getLogger(__name__)
1514

1615

17-
def has_datcore_tokens() -> bool:
18-
api_key = os.environ.get("BF_API_KEY")
19-
api_secret = os.environ.get("BF_API_SECRET")
20-
if not api_key or not api_secret:
21-
return False
22-
return not (api_key == "none" or api_secret == "none") # noqa: S105
23-
24-
2516
async def get_updated_project(
2617
sqlalchemy_async_engine: AsyncEngine, project_id: str
2718
) -> dict[str, Any]:
Lines changed: 43 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,24 @@
1+
# pylint:disable=unused-variable
2+
# pylint:disable=unused-argument
3+
# pylint:disable=redefined-outer-name
4+
15
import re
6+
from collections.abc import Iterator
27

8+
import httpx
39
import pytest
410
import respx
11+
from faker import Faker
12+
from fastapi_pagination import Page, Params
13+
from pytest_simcore.helpers.host import get_localhost_ip
514
from servicelib.aiohttp import status
615
from simcore_service_storage.modules.datcore_adapter.datcore_adapter_settings import (
716
DatcoreAdapterSettings,
817
)
918

1019

1120
@pytest.fixture
12-
def datcore_adapter_service_mock() -> respx.MockRouter:
21+
def datcore_adapter_service_mock(faker: Faker) -> Iterator[respx.MockRouter]:
1322
dat_core_settings = DatcoreAdapterSettings.create_from_envs()
1423
datcore_adapter_base_url = dat_core_settings.endpoint
1524
# mock base endpoint
@@ -18,15 +27,40 @@ def datcore_adapter_service_mock() -> respx.MockRouter:
1827
assert_all_called=False,
1928
assert_all_mocked=True,
2029
) as respx_mocker:
30+
# NOTE: passthrough the locahost and the local ip
31+
respx_mocker.route(host="127.0.0.1").pass_through()
32+
respx_mocker.route(host=get_localhost_ip()).pass_through()
33+
34+
respx_mocker.get("/user/profile", name="get_user_profile").respond(
35+
status.HTTP_200_OK, json=faker.pydict(allowed_types=(str,))
36+
)
2137
respx_mocker.get(
22-
datcore_adapter_base_url,
23-
name="healthcheck",
24-
).respond(status.HTTP_200_OK)
25-
list_datasets_re = re.compile(rf"^{datcore_adapter_base_url}/datasets")
38+
re.compile(r"/datasets/(?P<dataset_id>[^/]+)/files_legacy")
39+
).respond(status.HTTP_200_OK, json=[])
40+
list_datasets_re = re.compile(r"/datasets")
2641
respx_mocker.get(list_datasets_re, name="list_datasets").respond(
27-
status.HTTP_200_OK
42+
status.HTTP_200_OK,
43+
json=Page.create(items=[], params=Params(size=10), total=0).model_dump(
44+
mode="json"
45+
),
2846
)
29-
respx_mocker.get(datcore_adapter_base_url, name="base_endpoint").respond(
30-
status.HTTP_200_OK, json={}
47+
48+
def _create_download_link(request, file_id):
49+
return httpx.Response(
50+
status.HTTP_404_NOT_FOUND,
51+
json={"error": f"{file_id} not found!"},
52+
)
53+
54+
respx_mocker.get(
55+
re.compile(r"/files/(?P<file_id>[^/]+)"), name="get_file_dowload_link"
56+
).mock(side_effect=_create_download_link)
57+
58+
respx_mocker.get(
59+
"/",
60+
name="healthcheck",
61+
).respond(status.HTTP_200_OK, json={"message": "ok"})
62+
respx_mocker.get("", name="base_endpoint").respond(
63+
status.HTTP_200_OK, json={"message": "root entrypoint"}
3164
)
32-
return respx_mocker
65+
66+
yield respx_mocker

services/datcore-adapter/src/simcore_service_datcore_adapter/api/rest/datasets.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,31 @@ async def list_datasets(
5656
return create_page(datasets, total=total, params=params) # type: ignore[return-value]
5757

5858

59+
@router.get(
60+
"/datasets/{dataset_id}",
61+
status_code=status.HTTP_200_OK,
62+
response_model=DatasetMetaData,
63+
)
64+
@cancel_on_disconnect
65+
async def get_dataset(
66+
request: Request,
67+
x_datcore_api_key: Annotated[str, Header(..., description="Datcore API Key")],
68+
x_datcore_api_secret: Annotated[str, Header(..., description="Datcore API Secret")],
69+
pennsieve_client: Annotated[PennsieveApiClient, Depends(get_pennsieve_api_client)],
70+
params: Annotated[Params, Depends()],
71+
dataset_id: str,
72+
) -> DatasetMetaData:
73+
assert request # nosec
74+
raw_params: RawParams = resolve_params(params).to_raw_params()
75+
assert raw_params.limit is not None # nosec
76+
assert raw_params.offset is not None # nosec
77+
return await pennsieve_client.get_dataset(
78+
api_key=x_datcore_api_key,
79+
api_secret=x_datcore_api_secret,
80+
dataset_id=dataset_id,
81+
)
82+
83+
5984
@router.get(
6085
"/datasets/{dataset_id}/files",
6186
summary="list top level files/folders in a dataset",

services/datcore-adapter/src/simcore_service_datcore_adapter/modules/pennsieve.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
DataType,
1515
FileMetaData,
1616
)
17+
from pydantic import ByteSize
1718
from servicelib.logging_utils import log_context
1819
from servicelib.utils import logged_gather
1920
from starlette import status
@@ -81,9 +82,9 @@ class PennsieveAuthorizationHeaders(TypedDict):
8182
Authorization: str
8283

8384

84-
_TTL_CACHE_AUTHORIZATION_HEADERS_SECONDS: Final[
85-
int
86-
] = 3530 # NOTE: observed while developing this code, pennsieve authorizes 3600 seconds, so we cache a bit less
85+
_TTL_CACHE_AUTHORIZATION_HEADERS_SECONDS: Final[int] = (
86+
3530 # NOTE: observed while developing this code, pennsieve authorizes 3600 seconds, so we cache a bit less
87+
)
8788

8889
ExpirationTimeSecs = int
8990

@@ -346,12 +347,25 @@ async def list_datasets(
346347
DatasetMetaData(
347348
id=d["content"]["id"],
348349
display_name=d["content"]["name"],
350+
size=ByteSize(d["storage"]) if d["storage"] > 0 else None,
349351
)
350352
for d in dataset_page["datasets"]
351353
],
352354
dataset_page["totalCount"],
353355
)
354356

357+
async def get_dataset(
358+
self, api_key: str, api_secret: str, dataset_id: str
359+
) -> DatasetMetaData:
360+
dataset_pck = await self._get_dataset(api_key, api_secret, dataset_id)
361+
return DatasetMetaData(
362+
id=dataset_pck["content"]["id"],
363+
display_name=dataset_pck["content"]["name"],
364+
size=(
365+
ByteSize(dataset_pck["storage"]) if dataset_pck["storage"] > 0 else None
366+
),
367+
)
368+
355369
async def list_packages_in_dataset(
356370
self,
357371
api_key: str,

services/datcore-adapter/tests/unit/conftest.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,13 @@ def pennsieve_random_fake_datasets(
231231
) -> dict[str, Any]:
232232
return {
233233
"datasets": [
234-
{"content": {"id": create_pennsieve_fake_dataset_id(), "name": fake.text()}}
234+
{
235+
"content": {
236+
"id": create_pennsieve_fake_dataset_id(),
237+
"name": fake.text(),
238+
},
239+
"storage": fake.pyint(),
240+
}
235241
for _ in range(10)
236242
],
237243
"totalCount": 20,
@@ -308,7 +314,11 @@ async def pennsieve_subsystem_mock(
308314
).respond(
309315
status.HTTP_200_OK,
310316
json={
311-
"content": {"name": "Some dataset name that is awesome"},
317+
"content": {
318+
"name": "Some dataset name that is awesome",
319+
"id": pennsieve_dataset_id,
320+
},
321+
"storage": fake.pyint(),
312322
"children": pennsieve_mock_dataset_packages["packages"],
313323
},
314324
)

services/datcore-adapter/tests/unit/test_route_datasets.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,23 @@
1414
from starlette import status
1515

1616

17+
async def test_get_dataset_entrypoint(
18+
async_client: httpx.AsyncClient,
19+
pennsieve_dataset_id: str,
20+
pennsieve_subsystem_mock: respx.MockRouter | None,
21+
pennsieve_api_headers: dict[str, str],
22+
):
23+
response = await async_client.get(
24+
f"v0/datasets/{pennsieve_dataset_id}",
25+
headers=pennsieve_api_headers,
26+
)
27+
28+
assert response.status_code == status.HTTP_200_OK
29+
data = response.json()
30+
assert data
31+
TypeAdapter(DatasetMetaData).validate_python(data)
32+
33+
1734
async def test_list_datasets_entrypoint(
1835
async_client: httpx.AsyncClient,
1936
pennsieve_subsystem_mock: respx.MockRouter | None,

services/docker-compose.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1153,8 +1153,6 @@ services:
11531153
init: true
11541154
hostname: "sto-{{.Node.Hostname}}-{{.Task.Slot}}"
11551155
environment:
1156-
BF_API_KEY: ${BF_API_KEY}
1157-
BF_API_SECRET: ${BF_API_SECRET}
11581156
DATCORE_ADAPTER_HOST: ${DATCORE_ADAPTER_HOST:-datcore-adapter}
11591157
LOG_FORMAT_LOCAL_DEV_ENABLED: ${LOG_FORMAT_LOCAL_DEV_ENABLED}
11601158
LOG_FILTER_MAPPING : ${LOG_FILTER_MAPPING}

0 commit comments

Comments
 (0)