Skip to content

Commit de0f838

Browse files
fix: last modified filter
1 parent 108f3cc commit de0f838

File tree

7 files changed

+144
-65
lines changed

7 files changed

+144
-65
lines changed

packages/models-library/src/models_library/api_schemas_webserver/storage.py

Lines changed: 35 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
1+
import datetime
12
from pathlib import Path
2-
from typing import Annotated, Final
3+
from typing import Annotated, Final, Self
34

45
from models_library.utils.common_validators import (
56
MIN_NON_WILDCARD_CHARS,
67
WILDCARD_CHARS,
78
ensure_pattern_has_enough_characters,
89
)
9-
from pydantic import BaseModel, Field
10+
from pydantic import BaseModel, Field, model_validator
1011

1112
from ..api_schemas_storage.storage_schemas import (
1213
DEFAULT_NUMBER_OF_PATHS_PER_PAGE,
@@ -16,8 +17,8 @@
1617
from ..rest_pagination import CursorQueryParameters
1718
from ._base import InputSchema
1819

19-
MAX_SEARCH_ITEMS_PER_PAGE: Final[int] = 25
20-
DEFAULT_MAX_SEARCH_ITEMS_PER_PAGE: Final[int] = 50
20+
MAX_SEARCH_ITEMS_PER_PAGE: Final[int] = 50
21+
DEFAULT_MAX_SEARCH_ITEMS_PER_PAGE: Final[int] = 25
2122

2223

2324
class StorageLocationPathParams(BaseModel):
@@ -53,18 +54,44 @@ class DataExportPost(InputSchema):
5354

5455

5556
class SearchBodyParams(InputSchema):
56-
name_pattern: Annotated[
57+
filename_pattern: Annotated[
5758
str,
5859
ensure_pattern_has_enough_characters(),
5960
Field(
60-
description=f"Name pattern with wildcard support {tuple(WILDCARD_CHARS)}. Minimum of {MIN_NON_WILDCARD_CHARS} non-wildcard characters required.",
61+
description=f"File name pattern with wildcard support {tuple(WILDCARD_CHARS)}. Minimum of {MIN_NON_WILDCARD_CHARS} non-wildcard characters required.",
6162
),
6263
]
63-
max_items_per_page: Annotated[
64+
last_modified_before: Annotated[
65+
datetime.datetime | None,
66+
Field(
67+
default=None,
68+
description="Filter results to files modified before this date (inclusive). Format: YYYY-MM-DDTHH:MM:SS",
69+
),
70+
]
71+
last_modified_after: Annotated[
72+
datetime.datetime | None,
73+
Field(
74+
default=None,
75+
description="Filter results to files modified after this date (inclusive). Format: YYYY-MM-DDTHH:MM:SS",
76+
),
77+
]
78+
items_per_page: Annotated[
6479
int,
6580
Field(
66-
description="Max number of items per page",
81+
description="Number of items per page",
6782
ge=1,
6883
le=MAX_SEARCH_ITEMS_PER_PAGE,
6984
),
7085
] = DEFAULT_MAX_SEARCH_ITEMS_PER_PAGE
86+
87+
@model_validator(mode="after")
88+
def _validate_date_range(self) -> Self:
89+
"""Ensure that last_modified_before is after last_modified_after when both are present."""
90+
if (
91+
self.last_modified_before is not None
92+
and self.last_modified_after is not None
93+
and self.last_modified_before <= self.last_modified_after
94+
):
95+
msg = "last_modified_before must be after last_modified_after"
96+
raise ValueError(msg)
97+
return self

packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/storage/simcore_s3.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import datetime
12
from typing import Literal
23

34
from models_library.api_schemas_rpc_async_jobs.async_jobs import (
@@ -51,15 +52,19 @@ async def start_search(
5152
rabbitmq_rpc_client: RabbitMQRPCClient,
5253
*,
5354
job_filter: AsyncJobFilter,
54-
name_pattern: str,
55-
max_items_per_page: int,
55+
items_per_page: int,
56+
filename_pattern: str,
57+
last_modified_before: datetime.datetime | None = None,
58+
last_modified_after: datetime.datetime | None = None,
5659
) -> tuple[AsyncJobGet, AsyncJobFilter]:
5760
async_job_rpc_get = await submit(
5861
rabbitmq_rpc_client,
5962
rpc_namespace=STORAGE_RPC_NAMESPACE,
60-
method_name=TypeAdapter(RPCMethodName).validate_python("start_search"),
63+
method_name=TypeAdapter(RPCMethodName).validate_python("start_file_search"),
6164
job_filter=job_filter,
62-
name_pattern=name_pattern,
63-
max_items_per_page=max_items_per_page,
65+
items_per_page=items_per_page,
66+
name_pattern=filename_pattern,
67+
last_modified_before=last_modified_before,
68+
last_modified_after=last_modified_after,
6469
)
6570
return async_job_rpc_get, job_filter

services/storage/src/simcore_service_storage/api/_worker_tasks/_simcore_s3.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import datetime
12
import functools
23
import logging
34
from typing import Any
@@ -139,7 +140,9 @@ async def search(
139140
user_id: UserID,
140141
project_id: ProjectID | None,
141142
name_pattern: str,
142-
max_items_per_page: int,
143+
modified_before: datetime.datetime | None,
144+
modified_after: datetime.datetime | None,
145+
items_per_page: int,
143146
) -> None:
144147
with log_context(
145148
_logger,
@@ -157,7 +160,9 @@ async def search(
157160
user_id=user_id,
158161
project_id=project_id,
159162
name_pattern=name_pattern,
160-
items_per_page=max_items_per_page,
163+
modified_before=modified_before,
164+
modified_after=modified_after,
165+
items_per_page=items_per_page,
161166
):
162167
data = [
163168
SearchResult(

services/storage/src/simcore_service_storage/api/rpc/_simcore_s3.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ async def start_search(
7272
task_manager: TaskManager,
7373
job_filter: AsyncJobFilter,
7474
name_pattern: str,
75-
max_items_per_page: int,
75+
items_per_page: int,
7676
project_id: str | None = None,
7777
) -> AsyncJobGet:
7878
task_name = search.__name__
@@ -85,6 +85,6 @@ async def start_search(
8585
user_id=job_filter.user_id,
8686
project_id=project_id,
8787
name_pattern=name_pattern,
88-
max_items_per_page=max_items_per_page,
88+
items_per_page=items_per_page,
8989
)
9090
return AsyncJobGet(job_id=task_uuid, job_name=task_name)

services/storage/src/simcore_service_storage/simcore_s3_dsm.py

Lines changed: 62 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -989,58 +989,60 @@ async def _process_s3_page_results(
989989

990990
async def _search_project_s3_files(
991991
self,
992+
user_id: UserID,
992993
proj_id: ProjectID,
993994
filename_pattern: str,
994-
user_id: UserID,
995-
items_per_page: NonNegativeInt,
996-
) -> AsyncGenerator[list[FileMetaData], None]:
997-
"""Search S3 files in a specific project and yield results page by page."""
995+
last_modified_before: datetime.datetime | None = None,
996+
last_modified_after: datetime.datetime | None = None,
997+
) -> AsyncGenerator[FileMetaData, None]:
998+
"""Search S3 files in a specific project and yield individual results."""
998999
s3_client = get_s3_client(self.app)
9991000
min_parts_for_valid_s3_object = 2
1000-
current_page_results: list[FileMetaData] = []
10011001

10021002
try:
10031003
async for s3_objects in s3_client.list_objects_paginated(
10041004
bucket=self.simcore_bucket_name,
10051005
prefix=f"{proj_id}/",
1006-
items_per_page=items_per_page * 5, # fetch more to filter locally
1006+
items_per_page=500, # fetch larger batches for efficiency
10071007
):
10081008
for s3_obj in s3_objects:
10091009
filename = Path(s3_obj.object_key).name
10101010

1011-
if (
1011+
if not (
10121012
fnmatch.fnmatch(filename, filename_pattern)
10131013
and len(s3_obj.object_key.split("/"))
10141014
>= min_parts_for_valid_s3_object
10151015
):
1016-
file_meta = FileMetaData.from_simcore_node(
1017-
user_id=user_id,
1018-
file_id=TypeAdapter(SimcoreS3FileID).validate_python(
1019-
s3_obj.object_key
1020-
),
1021-
bucket=self.simcore_bucket_name,
1022-
location_id=self.get_location_id(),
1023-
location_name=self.get_location_name(),
1024-
sha256_checksum=None,
1025-
file_size=s3_obj.size,
1026-
last_modified=s3_obj.last_modified,
1027-
entity_tag=s3_obj.e_tag,
1028-
)
1029-
current_page_results.append(file_meta)
1016+
continue
10301017

1031-
if len(current_page_results) >= items_per_page:
1032-
processed_results = await self._process_s3_page_results(
1033-
current_page_results[:items_per_page]
1034-
)
1035-
yield processed_results
1036-
current_page_results = current_page_results[items_per_page:]
1018+
if (
1019+
last_modified_before
1020+
and s3_obj.last_modified
1021+
and s3_obj.last_modified >= last_modified_before
1022+
):
1023+
continue
1024+
1025+
if (
1026+
last_modified_after
1027+
and s3_obj.last_modified
1028+
and s3_obj.last_modified <= last_modified_after
1029+
):
1030+
continue
10371031

1038-
# Handle remaining results, ensuring we don't exceed items_per_page
1039-
while current_page_results:
1040-
batch = current_page_results[:items_per_page]
1041-
current_page_results = current_page_results[items_per_page:]
1042-
processed_results = await self._process_s3_page_results(batch)
1043-
yield processed_results
1032+
file_meta = FileMetaData.from_simcore_node(
1033+
user_id=user_id,
1034+
file_id=TypeAdapter(SimcoreS3FileID).validate_python(
1035+
s3_obj.object_key
1036+
),
1037+
bucket=self.simcore_bucket_name,
1038+
location_id=self.get_location_id(),
1039+
location_name=self.get_location_name(),
1040+
sha256_checksum=None,
1041+
file_size=s3_obj.size,
1042+
last_modified=s3_obj.last_modified,
1043+
entity_tag=s3_obj.e_tag,
1044+
)
1045+
yield file_meta
10441046

10451047
except S3KeyNotFoundError:
10461048
with log_context(
@@ -1054,6 +1056,8 @@ async def search(
10541056
*,
10551057
name_pattern: str,
10561058
project_id: ProjectID | None = None,
1059+
modified_before: datetime.datetime | None = None,
1060+
modified_after: datetime.datetime | None = None,
10571061
items_per_page: NonNegativeInt = 100,
10581062
) -> AsyncGenerator[list[FileMetaData], None]:
10591063
"""
@@ -1064,22 +1068,42 @@ async def search(
10641068
user_id: The user requesting the search
10651069
name_pattern: Wildcard pattern for filename matching (e.g., "*.txt", "test_*.json")
10661070
project_id: Optional project ID to limit search to specific project
1071+
modified_before: Optional datetime filter - only include files modified before this datetime
1072+
modified_after: Optional datetime filter - only include files modified after this datetime
10671073
items_per_page: Number of items to return per page
10681074
10691075
Yields:
1070-
List of FileMetaData objects for each page
1076+
List of FileMetaData objects for each page, with exactly items_per_page items
1077+
(except the last page which may have fewer)
10711078
"""
10721079
# Validate access rights
10731080
accessible_projects_ids = await get_accessible_project_ids(
10741081
get_db_engine(self.app), user_id=user_id, project_id=project_id
10751082
)
10761083

1077-
# Search each accessible project
1084+
# Collect all results across projects
1085+
current_page_results: list[FileMetaData] = []
1086+
10781087
for proj_id in accessible_projects_ids:
1079-
async for page_results in self._search_project_s3_files(
1080-
proj_id, name_pattern, user_id, items_per_page
1088+
async for file_result in self._search_project_s3_files(
1089+
user_id, proj_id, name_pattern, modified_before, modified_after
10811090
):
1082-
yield page_results
1091+
current_page_results.append(file_result)
1092+
1093+
if len(current_page_results) >= items_per_page:
1094+
page_batch = current_page_results[:items_per_page]
1095+
remaining_results = current_page_results[items_per_page:]
1096+
1097+
processed_page = await self._process_s3_page_results(page_batch)
1098+
yield processed_page
1099+
1100+
# NOTE: keep the remaining results for next page
1101+
current_page_results = remaining_results
1102+
1103+
# Handle any remaining results (the last page)
1104+
if current_page_results:
1105+
processed_page = await self._process_s3_page_results(current_page_results)
1106+
yield processed_page
10831107

10841108
async def create_soft_link(
10851109
self, user_id: int, target_file_id: StorageFileID, link_file_id: StorageFileID

services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17106,21 +17106,37 @@ components:
1710617106
\ - Task was aborted before completion"
1710717107
SearchBodyParams:
1710817108
properties:
17109-
namePattern:
17109+
filenamePattern:
1711017110
type: string
17111-
title: Namepattern
17112-
description: Name pattern with wildcard support ('*', '?'). Minimum of 3
17113-
non-wildcard characters required.
17114-
maxItemsPerPage:
17111+
title: Filenamepattern
17112+
description: File name pattern with wildcard support ('?', '*'). Minimum
17113+
of 3 non-wildcard characters required.
17114+
lastModifiedBefore:
17115+
anyOf:
17116+
- type: string
17117+
format: date-time
17118+
- type: 'null'
17119+
title: Lastmodifiedbefore
17120+
description: 'Filter results to files modified before this date (inclusive).
17121+
Format: YYYY-MM-DDTHH:MM:SS'
17122+
lastModifiedAfter:
17123+
anyOf:
17124+
- type: string
17125+
format: date-time
17126+
- type: 'null'
17127+
title: Lastmodifiedafter
17128+
description: 'Filter results to files modified after this date (inclusive).
17129+
Format: YYYY-MM-DDTHH:MM:SS'
17130+
itemsPerPage:
1711517131
type: integer
1711617132
maximum: 25
1711717133
minimum: 1
17118-
title: Maxitemsperpage
17119-
description: Max number of items per page
17134+
title: Itemsperpage
17135+
description: Number of items per page
1712017136
default: 50
1712117137
type: object
1712217138
required:
17123-
- namePattern
17139+
- filenamePattern
1712417140
title: SearchBodyParams
1712517141
SelectBox:
1712617142
properties:

services/web/server/src/simcore_service_webserver/storage/_rest.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -569,8 +569,10 @@ class _PathParams(BaseModel):
569569
user_id=_req_ctx.user_id,
570570
product_name=_req_ctx.product_name,
571571
),
572-
name_pattern=search_body.name_pattern,
573-
max_items_per_page=search_body.max_items_per_page,
572+
filename_pattern=search_body.filename_pattern,
573+
last_modified_before=search_body.last_modified_before,
574+
last_modified_after=search_body.last_modified_after,
575+
items_per_page=search_body.items_per_page,
574576
)
575577
_job_id = f"{async_job_rpc_get.job_id}"
576578
return create_data_response(

0 commit comments

Comments
 (0)