Skip to content

Commit 0e9c52d

Browse files
authored
🎨Storage: display path field is url encoded by parts (#7285)
1 parent 5c9e45b commit 0e9c52d

File tree

4 files changed

+157
-8
lines changed

4 files changed

+157
-8
lines changed

packages/models-library/src/models_library/api_schemas_storage/storage_schemas.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -407,7 +407,10 @@ class SoftCopyBody(BaseModel):
407407
class PathMetaDataGet(BaseModel):
408408
path: Annotated[Path, Field(description="the path to the current path")]
409409
display_path: Annotated[
410-
Path, Field(description="the path to display with UUID replaced")
410+
Path,
411+
Field(
412+
description="the path to display with UUID replaced (URL Encoded by parts as names may contain '/')"
413+
),
411414
]
412415

413416
file_meta_data: Annotated[

services/storage/src/simcore_service_storage/models.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -353,7 +353,12 @@ def none(cls) -> "AccessRights":
353353

354354
class PathMetaData(BaseModel):
355355
path: Path
356-
display_path: Path
356+
display_path: Annotated[
357+
Path,
358+
Field(
359+
description="Path with names instead of IDs (URL Encoded by parts as names may contain '/')"
360+
),
361+
]
357362
location_id: LocationID
358363
location: LocationName
359364
bucket_name: str
@@ -369,7 +374,7 @@ class PathMetaData(BaseModel):
369374
def update_display_fields(self, id_name_mapping: dict[str, str]) -> None:
370375
display_path = f"{self.path}"
371376
for old, new in id_name_mapping.items():
372-
display_path = display_path.replace(old, new)
377+
display_path = display_path.replace(old, urllib.parse.quote(new, safe=""))
373378
self.display_path = Path(display_path)
374379

375380
if self.file_meta_data:

services/storage/tests/conftest.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -562,14 +562,22 @@ async def _create_content(
562562

563563
# Create subdirectories
564564
s3_base_path = Path(f"{project_id}") / f"{node_id}" / dir_name
565-
s3_subdirs = [s3_base_path / f"sub-dir-{i}" for i in range(subdir_count)]
565+
# NOTE: add a space in the sub directory
566+
s3_subdirs = [
567+
s3_base_path / f"sub-dir_ect ory-{i}" for i in range(subdir_count)
568+
]
566569
# Randomly distribute files across subdirectories
567570
selected_subdirs = random.choices(s3_subdirs, k=file_count) # noqa: S311
568571
# Upload to S3
569572
with log_context(
570573
logging.INFO,
571574
msg=f"Uploading {file_count} files to S3 (each {file_size_in_dir.human_readable()}, total: {ByteSize(file_count * file_size_in_dir).human_readable()})",
572575
):
576+
# we ensure the file name contain a space
577+
def _file_name_with_space():
578+
file_name = faker.unique.file_name()
579+
return f"{file_name[:1]} {file_name[1:]}"
580+
573581
results = await asyncio.gather(
574582
*(
575583
_upload_file_to_s3(
@@ -578,7 +586,7 @@ async def _create_content(
578586
s3_bucket=storage_s3_bucket,
579587
local_file=local_file,
580588
file_id=TypeAdapter(SimcoreS3FileID).validate_python(
581-
f"{selected_subdir / faker.unique.file_name()}"
589+
f"{selected_subdir / _file_name_with_space()}"
582590
),
583591
)
584592
for selected_subdir in selected_subdirs
@@ -847,7 +855,6 @@ async def with_random_project_with_files(
847855
],
848856
],
849857
project_params: ProjectWithFilesParams,
850-
faker: Faker,
851858
) -> tuple[dict[str, Any], dict[NodeID, dict[SimcoreS3FileID, FileIDDict]],]:
852859
return await random_project_with_files(project_params)
853860

services/storage/tests/unit/test_handlers_paths.py

Lines changed: 136 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,11 @@
1111
from collections.abc import Awaitable, Callable
1212
from pathlib import Path
1313
from typing import Any, TypeAlias
14+
from urllib.parse import quote
1415

1516
import httpx
1617
import pytest
17-
from faker import Faker
18+
import sqlalchemy as sa
1819
from fastapi import FastAPI, status
1920
from fastapi_pagination.cursor import CursorPage
2021
from models_library.api_schemas_storage.storage_schemas import PathMetaDataGet
@@ -24,6 +25,8 @@
2425
from pytest_simcore.helpers.fastapi import url_from_operation_id
2526
from pytest_simcore.helpers.httpx_assert_checks import assert_status
2627
from pytest_simcore.helpers.storage_utils import FileIDDict, ProjectWithFilesParams
28+
from simcore_postgres_database.models.projects import projects
29+
from sqlalchemy.ext.asyncio import AsyncEngine
2730

2831
pytest_simcore_core_services_selection = ["postgres"]
2932
pytest_simcore_ops_services_selection = ["adminer"]
@@ -143,7 +146,6 @@ async def test_list_paths_pagination(
143146
dict[str, Any],
144147
dict[NodeID, dict[SimcoreS3FileID, FileIDDict]],
145148
],
146-
faker: Faker,
147149
):
148150
project, list_of_files = with_random_project_with_files
149151
num_nodes = len(list(project["workbench"]))
@@ -361,3 +363,135 @@ async def test_list_paths(
361363
expected_paths=expected_paths,
362364
check_total=False,
363365
)
366+
367+
368+
@pytest.mark.parametrize(
369+
"project_params",
370+
[
371+
ProjectWithFilesParams(
372+
num_nodes=1,
373+
allowed_file_sizes=(TypeAdapter(ByteSize).validate_python("0b"),),
374+
workspace_files_count=10,
375+
)
376+
],
377+
ids=str,
378+
)
379+
async def test_list_paths_with_display_name_containing_slashes(
380+
initialized_app: FastAPI,
381+
client: httpx.AsyncClient,
382+
location_id: LocationID,
383+
user_id: UserID,
384+
with_random_project_with_files: tuple[
385+
dict[str, Any],
386+
dict[NodeID, dict[SimcoreS3FileID, FileIDDict]],
387+
],
388+
sqlalchemy_async_engine: AsyncEngine,
389+
):
390+
project, list_of_files = with_random_project_with_files
391+
project_name_with_slashes = "soméà$èq¨thing with/ slas/h/es/"
392+
node_name_with_non_ascii = "my node / is not ascii: éàèù"
393+
# adjust project to contain "difficult" characters
394+
async with sqlalchemy_async_engine.begin() as conn:
395+
result = await conn.execute(
396+
sa.update(projects)
397+
.where(projects.c.uuid == project["uuid"])
398+
.values(name=project_name_with_slashes)
399+
.returning(sa.literal_column(f"{projects.c.name}, {projects.c.workbench}"))
400+
)
401+
row = result.one()
402+
assert row.name == project_name_with_slashes
403+
project_workbench = row.workbench
404+
assert len(project_workbench) == 1
405+
node = next(iter(project_workbench.values()))
406+
node["label"] = node_name_with_non_ascii
407+
result = await conn.execute(
408+
sa.update(projects)
409+
.where(projects.c.uuid == project["uuid"])
410+
.values(workbench=project_workbench)
411+
.returning(sa.literal_column(f"{projects.c.name}, {projects.c.workbench}"))
412+
)
413+
row = result.one()
414+
415+
# ls the root
416+
file_filter = None
417+
expected_paths = [(Path(project["uuid"]), False)]
418+
419+
page_of_paths = await _assert_list_paths(
420+
initialized_app,
421+
client,
422+
location_id,
423+
user_id,
424+
file_filter=file_filter,
425+
expected_paths=expected_paths,
426+
)
427+
428+
assert page_of_paths.items[0].display_path == Path(
429+
quote(project_name_with_slashes, safe="")
430+
), "display path parts should be url encoded"
431+
432+
# ls the nodes to ensure / is still there between project and node
433+
file_filter = Path(project["uuid"])
434+
expected_paths = sorted(
435+
((file_filter / node_key, False) for node_key in project["workbench"]),
436+
key=lambda x: x[0],
437+
)
438+
assert len(expected_paths) == 1, "test configuration problem"
439+
page_of_paths = await _assert_list_paths(
440+
initialized_app,
441+
client,
442+
location_id,
443+
user_id,
444+
file_filter=file_filter,
445+
expected_paths=expected_paths,
446+
)
447+
assert page_of_paths.items[0].display_path == Path(
448+
quote(project_name_with_slashes, safe="")
449+
) / quote(
450+
node_name_with_non_ascii, safe=""
451+
), "display path parts should be url encoded"
452+
453+
# ls in the node workspace
454+
selected_node_id = NodeID(random.choice(list(project["workbench"]))) # noqa: S311
455+
selected_node_s3_keys = [
456+
Path(s3_object_id) for s3_object_id in list_of_files[selected_node_id]
457+
]
458+
workspace_file_filter = file_filter / f"{selected_node_id}" / "workspace"
459+
expected_paths = _filter_and_group_paths_one_level_deeper(
460+
selected_node_s3_keys, workspace_file_filter
461+
)
462+
await _assert_list_paths(
463+
initialized_app,
464+
client,
465+
location_id,
466+
user_id,
467+
file_filter=workspace_file_filter,
468+
expected_paths=expected_paths,
469+
check_total=False,
470+
)
471+
472+
# ls in until we get to some files
473+
while selected_subfolders := [p for p in expected_paths if p[1] is False]:
474+
selected_path_filter = random.choice(selected_subfolders) # noqa: S311
475+
expected_paths = _filter_and_group_paths_one_level_deeper(
476+
selected_node_s3_keys, selected_path_filter[0]
477+
)
478+
page_of_paths = await _assert_list_paths(
479+
initialized_app,
480+
client,
481+
location_id,
482+
user_id,
483+
file_filter=selected_path_filter[0],
484+
expected_paths=expected_paths,
485+
check_total=False,
486+
)
487+
488+
expected_display_path = "/".join(
489+
[
490+
quote(project_name_with_slashes, safe=""),
491+
quote(node_name_with_non_ascii, safe=""),
492+
*(expected_paths[0][0].parts[2:]),
493+
],
494+
)
495+
assert page_of_paths.items[0].display_path == Path(
496+
expected_display_path
497+
), "display path parts should be url encoded"

0 commit comments

Comments
 (0)