Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
007cf44
initial test
sanderegg Feb 26, 2025
4d8ffeb
entrypoint
sanderegg Feb 26, 2025
3ef461e
improve test
sanderegg Feb 27, 2025
243dcbc
implement dsm
sanderegg Feb 27, 2025
dd59d9a
ongoing
sanderegg Feb 27, 2025
4be4d69
first version
sanderegg Feb 27, 2025
6be4596
total size in a node
sanderegg Feb 27, 2025
6e502ba
refactor test
sanderegg Feb 27, 2025
36b4d6b
refactor test
sanderegg Feb 27, 2025
d45d8f5
getting size of workspace
sanderegg Feb 27, 2025
a050413
working
sanderegg Feb 27, 2025
13ef115
working
sanderegg Feb 27, 2025
897bdf8
mypy
sanderegg Feb 27, 2025
46256c7
datcore?
sanderegg Feb 27, 2025
0547d86
ruff
sanderegg Mar 3, 2025
14d99ef
removed BF variables
sanderegg Mar 3, 2025
c12c771
check also datcore now
sanderegg Mar 3, 2025
130e9b4
ongoing
sanderegg Mar 3, 2025
2ba6bcb
test in handler paths are running again
sanderegg Mar 3, 2025
65f3bf0
refactor
sanderegg Mar 3, 2025
23af204
refactor
sanderegg Mar 3, 2025
e351cdf
additional testing
sanderegg Mar 3, 2025
68ae3f9
add test
sanderegg Mar 3, 2025
6d99847
set correct passthroughs
sanderegg Mar 4, 2025
a1b4268
fix test
sanderegg Mar 4, 2025
1c2a107
fix test
sanderegg Mar 4, 2025
444db0e
fixed tests also with datcore
sanderegg Mar 4, 2025
9a7b76a
ongoing
sanderegg Mar 4, 2025
64fe253
add disk monitoring
sanderegg Mar 4, 2025
8c9069a
handle file not found in datcore
sanderegg Mar 4, 2025
640477b
fixed return values
sanderegg Mar 4, 2025
e563e7d
fixed tests
sanderegg Mar 4, 2025
e4e2638
test passing
sanderegg Mar 4, 2025
22d34ab
mypy
sanderegg Mar 6, 2025
81618e8
parametrize
sanderegg Mar 6, 2025
350e832
pylint
sanderegg Mar 6, 2025
70d8d76
compute total size in datcore as well
sanderegg Mar 6, 2025
6e320d2
added get_dataset entrypoint and test
sanderegg Mar 6, 2025
e44ac5d
updated openapi
sanderegg Mar 6, 2025
8195ae8
sonar
sanderegg Mar 6, 2025
3a84797
@pcrespov review: unreachable code
sanderegg Mar 6, 2025
fce2443
@pcrespov review: renaming
sanderegg Mar 6, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions .env-devel
Original file line number Diff line number Diff line change
Expand Up @@ -250,8 +250,6 @@ SMTP_PROTOCOL=UNENCRYPTED
SMTP_USERNAME=it_doesnt_matter

# STORAGE ----
BF_API_KEY=none
BF_API_SECRET=none
STORAGE_ENDPOINT=storage:8080
STORAGE_HOST=storage
STORAGE_LOGLEVEL=INFO
Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
from datetime import datetime
from enum import Enum, unique
from pathlib import Path
from typing import Annotated

from pydantic import BaseModel, ByteSize
from pydantic import BaseModel, ByteSize, Field


class DatasetMetaData(BaseModel):
id: str
display_name: str
size: Annotated[
ByteSize | None, Field(description="Size of the dataset in bytes if available")
]


@unique
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -462,3 +462,31 @@ def _update_json_schema_extra(schema: JsonDict) -> None:
model_config = ConfigDict(
extra="forbid", json_schema_extra=_update_json_schema_extra
)


class PathTotalSizeCreate(BaseModel):
path: Path
size: ByteSize

@staticmethod
def _update_json_schema_extra(schema: JsonDict) -> None:
schema.update(
{
"examples": [
# a folder
{
"path": "f8da77a9-24b9-4eab-aee7-1f0608da1e3e",
"size": 15728640,
},
# 1 file
{
"path": f"f8da77a9-24b9-4eab-aee7-1f0608da1e3e/2f94f80f-633e-4dfa-a983-226b7babe3d7/outputs/output5/{FileMetaDataGet.model_json_schema()['examples'][0]['file_name']}",
"size": 1024,
},
]
}
)

model_config = ConfigDict(
extra="forbid", json_schema_extra=_update_json_schema_extra
)
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import logging
import os
from dataclasses import dataclass
from pathlib import Path
from typing import Any, TypedDict
Expand All @@ -14,14 +13,6 @@
log = logging.getLogger(__name__)


def has_datcore_tokens() -> bool:
api_key = os.environ.get("BF_API_KEY")
api_secret = os.environ.get("BF_API_SECRET")
if not api_key or not api_secret:
return False
return not (api_key == "none" or api_secret == "none") # noqa: S105


async def get_updated_project(
sqlalchemy_async_engine: AsyncEngine, project_id: str
) -> dict[str, Any]:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,15 +1,24 @@
# pylint:disable=unused-variable
# pylint:disable=unused-argument
# pylint:disable=redefined-outer-name

import re
from collections.abc import Iterator

import httpx
import pytest
import respx
from faker import Faker
from fastapi_pagination import Page, Params
from pytest_simcore.helpers.host import get_localhost_ip
from servicelib.aiohttp import status
from simcore_service_storage.modules.datcore_adapter.datcore_adapter_settings import (
DatcoreAdapterSettings,
)


@pytest.fixture
def datcore_adapter_service_mock() -> respx.MockRouter:
def datcore_adapter_service_mock(faker: Faker) -> Iterator[respx.MockRouter]:
dat_core_settings = DatcoreAdapterSettings.create_from_envs()
datcore_adapter_base_url = dat_core_settings.endpoint
# mock base endpoint
Expand All @@ -18,15 +27,40 @@ def datcore_adapter_service_mock() -> respx.MockRouter:
assert_all_called=False,
assert_all_mocked=True,
) as respx_mocker:
# NOTE: passthrough the locahost and the local ip
respx_mocker.route(host="127.0.0.1").pass_through()
respx_mocker.route(host=get_localhost_ip()).pass_through()

respx_mocker.get("/user/profile", name="get_user_profile").respond(
status.HTTP_200_OK, json=faker.pydict(allowed_types=(str,))
)
respx_mocker.get(
datcore_adapter_base_url,
name="healthcheck",
).respond(status.HTTP_200_OK)
list_datasets_re = re.compile(rf"^{datcore_adapter_base_url}/datasets")
re.compile(r"/datasets/(?P<dataset_id>[^/]+)/files_legacy")
).respond(status.HTTP_200_OK, json=[])
list_datasets_re = re.compile(r"/datasets")
respx_mocker.get(list_datasets_re, name="list_datasets").respond(
status.HTTP_200_OK
status.HTTP_200_OK,
json=Page.create(items=[], params=Params(size=10), total=0).model_dump(
mode="json"
),
)
respx_mocker.get(datcore_adapter_base_url, name="base_endpoint").respond(
status.HTTP_200_OK, json={}

def _create_download_link(request, file_id):
return httpx.Response(
status.HTTP_404_NOT_FOUND,
json={"error": f"{file_id} not found!"},
)

respx_mocker.get(
re.compile(r"/files/(?P<file_id>[^/]+)"), name="get_file_dowload_link"
).mock(side_effect=_create_download_link)

respx_mocker.get(
"/",
name="healthcheck",
).respond(status.HTTP_200_OK, json={"message": "ok"})
respx_mocker.get("", name="base_endpoint").respond(
status.HTTP_200_OK, json={"message": "root entrypoint"}
)
return respx_mocker

yield respx_mocker
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,31 @@ async def list_datasets(
return create_page(datasets, total=total, params=params) # type: ignore[return-value]


@router.get(
"/datasets/{dataset_id}",
status_code=status.HTTP_200_OK,
response_model=DatasetMetaData,
)
@cancel_on_disconnect
async def get_dataset(
request: Request,
x_datcore_api_key: Annotated[str, Header(..., description="Datcore API Key")],
x_datcore_api_secret: Annotated[str, Header(..., description="Datcore API Secret")],
pennsieve_client: Annotated[PennsieveApiClient, Depends(get_pennsieve_api_client)],
params: Annotated[Params, Depends()],
dataset_id: str,
) -> DatasetMetaData:
assert request # nosec
raw_params: RawParams = resolve_params(params).to_raw_params()
assert raw_params.limit is not None # nosec
assert raw_params.offset is not None # nosec
return await pennsieve_client.get_dataset(
api_key=x_datcore_api_key,
api_secret=x_datcore_api_secret,
dataset_id=dataset_id,
)


@router.get(
"/datasets/{dataset_id}/files",
summary="list top level files/folders in a dataset",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
DataType,
FileMetaData,
)
from pydantic import ByteSize
from servicelib.logging_utils import log_context
from servicelib.utils import logged_gather
from starlette import status
Expand Down Expand Up @@ -81,9 +82,9 @@ class PennsieveAuthorizationHeaders(TypedDict):
Authorization: str


_TTL_CACHE_AUTHORIZATION_HEADERS_SECONDS: Final[
int
] = 3530 # NOTE: observed while developing this code, pennsieve authorizes 3600 seconds, so we cache a bit less
_TTL_CACHE_AUTHORIZATION_HEADERS_SECONDS: Final[int] = (
3530 # NOTE: observed while developing this code, pennsieve authorizes 3600 seconds, so we cache a bit less
)

ExpirationTimeSecs = int

Expand Down Expand Up @@ -346,12 +347,25 @@ async def list_datasets(
DatasetMetaData(
id=d["content"]["id"],
display_name=d["content"]["name"],
size=ByteSize(d["storage"]) if d["storage"] > 0 else None,
)
for d in dataset_page["datasets"]
],
dataset_page["totalCount"],
)

async def get_dataset(
self, api_key: str, api_secret: str, dataset_id: str
) -> DatasetMetaData:
dataset_pck = await self._get_dataset(api_key, api_secret, dataset_id)
return DatasetMetaData(
id=dataset_pck["content"]["id"],
display_name=dataset_pck["content"]["name"],
size=(
ByteSize(dataset_pck["storage"]) if dataset_pck["storage"] > 0 else None
),
)

async def list_packages_in_dataset(
self,
api_key: str,
Expand Down
14 changes: 12 additions & 2 deletions services/datcore-adapter/tests/unit/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,13 @@ def pennsieve_random_fake_datasets(
) -> dict[str, Any]:
return {
"datasets": [
{"content": {"id": create_pennsieve_fake_dataset_id(), "name": fake.text()}}
{
"content": {
"id": create_pennsieve_fake_dataset_id(),
"name": fake.text(),
},
"storage": fake.pyint(),
}
for _ in range(10)
],
"totalCount": 20,
Expand Down Expand Up @@ -308,7 +314,11 @@ async def pennsieve_subsystem_mock(
).respond(
status.HTTP_200_OK,
json={
"content": {"name": "Some dataset name that is awesome"},
"content": {
"name": "Some dataset name that is awesome",
"id": pennsieve_dataset_id,
},
"storage": fake.pyint(),
"children": pennsieve_mock_dataset_packages["packages"],
},
)
Expand Down
17 changes: 17 additions & 0 deletions services/datcore-adapter/tests/unit/test_route_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,23 @@
from starlette import status


async def test_get_dataset_entrypoint(
async_client: httpx.AsyncClient,
pennsieve_dataset_id: str,
pennsieve_subsystem_mock: respx.MockRouter | None,
pennsieve_api_headers: dict[str, str],
):
response = await async_client.get(
f"v0/datasets/{pennsieve_dataset_id}",
headers=pennsieve_api_headers,
)

assert response.status_code == status.HTTP_200_OK
data = response.json()
assert data
TypeAdapter(DatasetMetaData).validate_python(data)


async def test_list_datasets_entrypoint(
async_client: httpx.AsyncClient,
pennsieve_subsystem_mock: respx.MockRouter | None,
Expand Down
2 changes: 0 additions & 2 deletions services/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1153,8 +1153,6 @@ services:
init: true
hostname: "sto-{{.Node.Hostname}}-{{.Task.Slot}}"
environment:
BF_API_KEY: ${BF_API_KEY}
BF_API_SECRET: ${BF_API_SECRET}
DATCORE_ADAPTER_HOST: ${DATCORE_ADAPTER_HOST:-datcore-adapter}
LOG_FORMAT_LOCAL_DEV_ENABLED: ${LOG_FORMAT_LOCAL_DEV_ENABLED}
LOG_FILTER_MAPPING : ${LOG_FILTER_MAPPING}
Expand Down
Loading