Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,15 @@
import datetime
from collections.abc import AsyncGenerator
from pathlib import Path
from typing import TypeAlias
from typing import Annotated, TypeAlias

import sqlalchemy as sa
from models_library.basic_types import SHA256Str
from models_library.projects import ProjectID
from models_library.projects_nodes_io import NodeID, SimcoreS3FileID
from models_library.users import UserID
from models_library.utils.fastapi_encoders import jsonable_encoder
from pydantic import BaseModel
from pydantic import BaseModel, Field, validate_call
from simcore_postgres_database.storage_models import file_meta_data
from simcore_postgres_database.utils_repos import (
pass_or_acquire_connection,
Expand All @@ -35,16 +35,15 @@


class _PathsCursorParameters(BaseModel):
# NOTE: this is a cursor do not put things that can grow unbounded as this goes then through REST APIs or such
offset: int
file_prefix: Path | None
project_ids: list[ProjectID] | None
partial: bool


def _init_pagination(
cursor: GenericCursor | None,
*,
filter_by_project_ids: list[ProjectID] | None,
filter_by_file_prefix: Path | None,
is_partial_prefix: bool,
) -> _PathsCursorParameters:
Expand All @@ -53,7 +52,6 @@ def _init_pagination(
return _PathsCursorParameters(
offset=0,
file_prefix=filter_by_file_prefix,
project_ids=filter_by_project_ids,
partial=is_partial_prefix,
)

Expand Down Expand Up @@ -229,23 +227,28 @@ async def try_get_directory(
return None
return None

@validate_call(config={"arbitrary_types_allowed": True})
async def list_child_paths(
self,
*,
connection: AsyncConnection | None = None,
filter_by_project_ids: list[ProjectID] | None,
filter_by_project_ids: Annotated[
list[ProjectID] | None, Field(max_length=10000)
],
filter_by_file_prefix: Path | None,
cursor: GenericCursor | None,
limit: int,
is_partial_prefix: bool,
) -> tuple[list[PathMetaData], GenericCursor | None, TotalChildren]:
"""returns a list of FileMetaDataAtDB that are one level deep.
e.g. when no filter is used, these are top level objects

NOTE: if filter_by_project_ids is huge, this will raise ValidationError and someone needs to fix it!
Maybe using a DB join
"""

cursor_params = _init_pagination(
cursor,
filter_by_project_ids=filter_by_project_ids,
filter_by_file_prefix=filter_by_file_prefix,
is_partial_prefix=is_partial_prefix,
)
Expand Down Expand Up @@ -278,9 +281,9 @@ async def list_child_paths(
file_meta_data.c.file_id.like(search_prefix),
(
file_meta_data.c.project_id.in_(
[f"{_}" for _ in cursor_params.project_ids]
[f"{_}" for _ in filter_by_project_ids]
)
if cursor_params.project_ids
if filter_by_project_ids
else True
),
)
Expand All @@ -303,9 +306,9 @@ async def list_child_paths(
)
.where(
file_meta_data.c.project_id.in_(
[f"{_}" for _ in cursor_params.project_ids]
[f"{_}" for _ in filter_by_project_ids]
)
if cursor_params.project_ids
if filter_by_project_ids
else True
)
.cte("ranked_files")
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
from contextlib import suppress
from pathlib import Path
from typing import TypeAlias
Expand Down Expand Up @@ -31,6 +32,8 @@
from ..modules.db.projects import ProjectRepository
from .utils import convert_db_to_model

_logger = logging.getLogger(__name__)


async def _list_all_files_in_folder(
*,
Expand Down Expand Up @@ -250,6 +253,11 @@ async def list_child_paths_from_s3(
"""
objects_cursor = None
if cursor is not None:
_logger.debug(
"Using cursor for listing child paths in S3 for filter '%s': %s",
file_filter,
cursor,
)
cursor_params = json_loads(cursor)
assert cursor_params["file_filter"] == f"{file_filter}" # nosec
objects_cursor = cursor_params["objects_next_cursor"]
Expand Down Expand Up @@ -277,6 +285,11 @@ async def list_child_paths_from_s3(
]
next_cursor = None
if objects_next_cursor:
_logger.debug(
"Next cursor for listing child paths in S3 for filter '%s': %s",
file_filter,
objects_next_cursor,
)
next_cursor = json_dumps(
{
"file_filter": f"{file_filter}",
Expand Down
Loading