Skip to content

Commit 8d7014d

Browse files
committed
compute total size in datcore as well
1 parent de1e8e5 commit 8d7014d

File tree

5 files changed

+118
-7
lines changed

5 files changed

+118
-7
lines changed

packages/models-library/src/models_library/api_schemas_datcore_adapter/datasets.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,17 @@
11
from datetime import datetime
22
from enum import Enum, unique
33
from pathlib import Path
4+
from typing import Annotated
45

5-
from pydantic import BaseModel, ByteSize
6+
from pydantic import BaseModel, ByteSize, Field
67

78

89
class DatasetMetaData(BaseModel):
910
id: str
1011
display_name: str
12+
size: Annotated[
13+
ByteSize | None, Field(description="Size of the dataset in bytes if available")
14+
]
1115

1216

1317
@unique

services/datcore-adapter/src/simcore_service_datcore_adapter/api/rest/datasets.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,31 @@ async def list_datasets(
5656
return create_page(datasets, total=total, params=params) # type: ignore[return-value]
5757

5858

59+
@router.get(
60+
"/datasets/{dataset_id}",
61+
status_code=status.HTTP_200_OK,
62+
response_model=DatasetMetaData,
63+
)
64+
@cancel_on_disconnect
65+
async def get_dataset(
66+
request: Request,
67+
x_datcore_api_key: Annotated[str, Header(..., description="Datcore API Key")],
68+
x_datcore_api_secret: Annotated[str, Header(..., description="Datcore API Secret")],
69+
pennsieve_client: Annotated[PennsieveApiClient, Depends(get_pennsieve_api_client)],
70+
params: Annotated[Params, Depends()],
71+
dataset_id: str,
72+
) -> DatasetMetaData:
73+
assert request # nosec
74+
raw_params: RawParams = resolve_params(params).to_raw_params()
75+
assert raw_params.limit is not None # nosec
76+
assert raw_params.offset is not None # nosec
77+
return await pennsieve_client.get_dataset(
78+
api_key=x_datcore_api_key,
79+
api_secret=x_datcore_api_secret,
80+
dataset_id=dataset_id,
81+
)
82+
83+
5984
@router.get(
6085
"/datasets/{dataset_id}/files",
6186
summary="list top level files/folders in a dataset",

services/datcore-adapter/src/simcore_service_datcore_adapter/modules/pennsieve.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
DataType,
1515
FileMetaData,
1616
)
17+
from pydantic import ByteSize
1718
from servicelib.logging_utils import log_context
1819
from servicelib.utils import logged_gather
1920
from starlette import status
@@ -81,9 +82,9 @@ class PennsieveAuthorizationHeaders(TypedDict):
8182
Authorization: str
8283

8384

84-
_TTL_CACHE_AUTHORIZATION_HEADERS_SECONDS: Final[
85-
int
86-
] = 3530 # NOTE: observed while developing this code, pennsieve authorizes 3600 seconds, so we cache a bit less
85+
_TTL_CACHE_AUTHORIZATION_HEADERS_SECONDS: Final[int] = (
86+
3530 # NOTE: observed while developing this code, pennsieve authorizes 3600 seconds, so we cache a bit less
87+
)
8788

8889
ExpirationTimeSecs = int
8990

@@ -346,12 +347,25 @@ async def list_datasets(
346347
DatasetMetaData(
347348
id=d["content"]["id"],
348349
display_name=d["content"]["name"],
350+
size=ByteSize(d["storage"]) if d["storage"] > 0 else None,
349351
)
350352
for d in dataset_page["datasets"]
351353
],
352354
dataset_page["totalCount"],
353355
)
354356

357+
async def get_dataset(
358+
self, api_key: str, api_secret: str, dataset_id: str
359+
) -> DatasetMetaData:
360+
dataset_pck = await self._get_dataset(api_key, api_secret, dataset_id)
361+
return DatasetMetaData(
362+
id=dataset_pck["content"]["id"],
363+
display_name=dataset_pck["content"]["name"],
364+
size=(
365+
ByteSize(dataset_pck["storage"]) if dataset_pck["storage"] > 0 else None
366+
),
367+
)
368+
355369
async def list_packages_in_dataset(
356370
self,
357371
api_key: str,

services/storage/src/simcore_service_storage/datcore_dsm.py

Lines changed: 45 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import arrow
66
from fastapi import FastAPI
77
from models_library.api_schemas_storage.storage_schemas import (
8+
UNDEFINED_SIZE_TYPE,
89
DatCoreCollectionName,
910
DatCoreDatasetName,
1011
DatCorePackageName,
@@ -16,6 +17,7 @@
1617
from models_library.projects_nodes_io import LocationID, LocationName, StorageFileID
1718
from models_library.users import UserID
1819
from pydantic import AnyUrl, ByteSize, NonNegativeInt, TypeAdapter, ValidationError
20+
from servicelib.utils import limited_as_completed
1921

2022
from .constants import DATCORE_ID, DATCORE_STR
2123
from .dsm_factory import BaseDataManager
@@ -191,12 +193,53 @@ async def compute_path_total_size(self, user_id: UserID, *, path: Path) -> ByteS
191193
"""returns the total size of an arbitrary path"""
192194
api_token, api_secret = await self._get_datcore_tokens(user_id)
193195
api_token, api_secret = _check_api_credentials(api_token, api_secret)
196+
197+
# if this is a dataset we might have the size directly
198+
with contextlib.suppress(ValidationError):
199+
dataset_id = TypeAdapter(DatCoreDatasetName).validate_python(f"{path}")
200+
_, dataset_size = await datcore_adapter.get_dataset(
201+
self.app,
202+
api_key=api_token,
203+
api_secret=api_secret,
204+
dataset_id=dataset_id,
205+
)
206+
if dataset_size is not None:
207+
return dataset_size
208+
209+
# generic computation
194210
try:
195-
paths = await self.list_paths(
196-
user_id, file_filter=path, cursor=None, limit=1
211+
paths, cursor, total_number = await self.list_paths(
212+
user_id, file_filter=path, cursor=None, limit=50
197213
)
214+
accumulated_size = ByteSize(0)
215+
216+
next_folders: list[PathMetaData] = []
217+
for p in paths:
218+
if p.file_meta_data is not None:
219+
# this is a file
220+
assert (
221+
p.file_meta_data.file_size is not UNDEFINED_SIZE_TYPE
222+
) # nosec
223+
assert isinstance(p.file_meta_data.file_size, ByteSize) # nosec
224+
accumulated_size = ByteSize(
225+
accumulated_size + p.file_meta_data.file_size
226+
)
227+
else:
228+
next_folders.append(p)
229+
async for sbfolder_size_future in limited_as_completed(
230+
(
231+
self.compute_path_total_size(user_id, path=sub_folder.path)
232+
for sub_folder in next_folders
233+
),
234+
limit=3,
235+
):
236+
size = await sbfolder_size_future
237+
accumulated_size = ByteSize(accumulated_size + size)
238+
239+
return accumulated_size
198240
if len(paths) == 0:
199241
return ByteSize(0)
242+
200243
except ValidationError:
201244
# invalid path
202245
return ByteSize(0)

services/storage/src/simcore_service_storage/modules/datcore_adapter/datcore_adapter.py

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
DatCorePackageName,
2020
)
2121
from models_library.users import UserID
22-
from pydantic import AnyUrl, BaseModel, NonNegativeInt, TypeAdapter
22+
from pydantic import AnyUrl, BaseModel, ByteSize, NonNegativeInt, TypeAdapter
2323
from servicelib.fastapi.client_session import get_client_session
2424
from servicelib.utils import logged_gather
2525

@@ -317,6 +317,31 @@ async def list_datasets(
317317
)
318318

319319

320+
async def get_dataset(
321+
app: FastAPI,
322+
*,
323+
api_key: str,
324+
api_secret: str,
325+
dataset_id: DatCoreDatasetName,
326+
) -> tuple[DatasetMetaData, ByteSize | None]:
327+
response = await request(
328+
app,
329+
api_key,
330+
api_secret,
331+
"GET",
332+
f"/datasets/{dataset_id}",
333+
)
334+
assert isinstance(response, dict) # nosec
335+
datcore_dataset = DatCoreDatasetMetaData(**response)
336+
337+
return (
338+
DatasetMetaData(
339+
dataset_id=datcore_dataset.id, display_name=datcore_dataset.display_name
340+
),
341+
datcore_dataset.size,
342+
)
343+
344+
320345
async def get_file_download_presigned_link(
321346
app: FastAPI, api_key: str, api_secret: str, file_id: str
322347
) -> AnyUrl:

0 commit comments

Comments
 (0)