Skip to content

Commit 986f663

Browse files
authored
🐛 Fix: file uploads due to bad path encoding (#6287)
1 parent 5ffecf1 commit 986f663

File tree

3 files changed

+101
-30
lines changed

3 files changed

+101
-30
lines changed

services/web/server/src/simcore_service_webserver/storage/_handlers.py

Lines changed: 27 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -37,30 +37,45 @@
3737

3838
def _get_base_storage_url(app: web.Application) -> URL:
3939
settings: StorageSettings = get_plugin_settings(app)
40-
41-
# storage service API endpoint
42-
return URL(settings.base_url)
40+
return URL(settings.base_url, encoded=True)
4341

4442

45-
def _get_storage_prefix(app: web.Application) -> str:
43+
def _get_storage_vtag(app: web.Application) -> str:
4644
settings: StorageSettings = get_plugin_settings(app)
4745
storage_prefix: str = settings.STORAGE_VTAG
4846
return storage_prefix
4947

5048

51-
def _resolve_storage_url(request: web.Request) -> URL:
52-
"""Composes a new url against storage API"""
49+
def _to_storage_url(request: web.Request) -> URL:
50+
"""Converts web-api url to storage-api url"""
5351
userid = request[RQT_USERID_KEY]
5452

5553
# storage service API endpoint
56-
endpoint = _get_base_storage_url(request.app)
54+
url = _get_base_storage_url(request.app)
5755

5856
basepath_index = 3
5957
# strip basepath from webserver API path (i.e. webserver api version)
6058
# >>> URL('http://storage:1234/v5/storage/asdf/').raw_parts[3:]
6159
suffix = "/".join(request.url.raw_parts[basepath_index:])
6260

63-
return (endpoint / suffix).with_query(request.query).update_query(user_id=userid)
61+
return (
62+
url.joinpath(suffix, encoded=True)
63+
.with_query(request.query)
64+
.update_query(user_id=userid)
65+
)
66+
67+
68+
def _from_storage_url(request: web.Request, storage_url: AnyUrl) -> AnyUrl:
69+
"""Converts storage-api url to web-api url"""
70+
assert storage_url.path # nosec
71+
72+
prefix = f"/{_get_storage_vtag(request.app)}"
73+
converted_url = request.url.with_path(
74+
f"/v0/storage{storage_url.path.removeprefix(prefix)}", encoded=True
75+
).with_scheme(request.headers.get(X_FORWARDED_PROTO, request.url.scheme))
76+
77+
webserver_url: AnyUrl = parse_obj_as(AnyUrl, f"{converted_url}")
78+
return webserver_url
6479

6580

6681
class _ResponseTuple(NamedTuple):
@@ -71,7 +86,7 @@ class _ResponseTuple(NamedTuple):
7186
async def _forward_request_to_storage(
7287
request: web.Request, method: str, body: dict[str, Any] | None = None, **kwargs
7388
) -> _ResponseTuple:
74-
url = _resolve_storage_url(request)
89+
url = _to_storage_url(request)
7590
session = get_client_session(request.app)
7691

7792
async with session.request(
@@ -81,16 +96,6 @@ async def _forward_request_to_storage(
8196
return _ResponseTuple(payload=payload, status_code=resp.status)
8297

8398

84-
def _unresolve_storage_url(request: web.Request, storage_url: AnyUrl) -> AnyUrl:
85-
assert storage_url.path # nosec
86-
prefix = f"/{_get_storage_prefix(request.app)}"
87-
converted_url = request.url.with_path(
88-
f"/v0/storage{storage_url.path.removeprefix(prefix)}"
89-
).with_scheme(request.headers.get(X_FORWARDED_PROTO, request.url.scheme))
90-
converted_url_: AnyUrl = parse_obj_as(AnyUrl, f"{converted_url}")
91-
return converted_url_
92-
93-
9499
# ---------------------------------------------------------------------
95100

96101
routes = web.RouteTableDef()
@@ -233,10 +238,10 @@ class _QueryParams(BaseModel):
233238
payload, status = await _forward_request_to_storage(request, "PUT", body=None)
234239
data, _ = unwrap_envelope(payload)
235240
file_upload_schema = FileUploadSchema.parse_obj(data)
236-
file_upload_schema.links.complete_upload = _unresolve_storage_url(
241+
file_upload_schema.links.complete_upload = _from_storage_url(
237242
request, file_upload_schema.links.complete_upload
238243
)
239-
file_upload_schema.links.abort_upload = _unresolve_storage_url(
244+
file_upload_schema.links.abort_upload = _from_storage_url(
240245
request, file_upload_schema.links.abort_upload
241246
)
242247
return create_data_response(jsonable_encoder(file_upload_schema), status=status)
@@ -261,7 +266,7 @@ class _PathParams(BaseModel):
261266
)
262267
data, _ = unwrap_envelope(payload)
263268
file_upload_complete = FileUploadCompleteResponse.parse_obj(data)
264-
file_upload_complete.links.state = _unresolve_storage_url(
269+
file_upload_complete.links.state = _from_storage_url(
265270
request, file_upload_complete.links.state
266271
)
267272
return create_data_response(jsonable_encoder(file_upload_complete), status=status)

services/web/server/tests/unit/isolated/test_utils.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,25 +18,33 @@
1818

1919

2020
def test_yarl_new_url_generation():
21-
api_endpoint = URL("http://director:8001/v0")
21+
base_url_with_autoencode = URL("http://director:8001/v0", encoded=False)
2222
service_key = "simcore/services/dynamic/smash"
2323
service_version = "1.0.3"
2424

25+
# NOTE: careful, first we need to encode the "/" in this file path.
26+
# For that we need safe="" option
27+
assert urllib.parse.quote("/") == "/"
28+
assert urllib.parse.quote("/", safe="") == "%2F"
29+
assert urllib.parse.quote("%2F", safe="") == "%252F"
30+
2531
quoted_service_key = urllib.parse.quote(service_key, safe="")
2632

2733
# Since 1.6.x composition using '/' creates URLs with auto-encoding enabled by default
2834
assert (
29-
(str(api_endpoint / "services" / quoted_service_key / service_version))
35+
str(
36+
base_url_with_autoencode / "services" / quoted_service_key / service_version
37+
)
3038
== "http://director:8001/v0/services/simcore%252Fservices%252Fdynamic%252Fsmash/1.0.3"
3139
)
3240

3341
# Passing encoded=True parameter prevents URL auto-encoding, user is responsible about URL correctness
34-
url = URL(
42+
url_without_autoencode = URL(
3543
f"http://director:8001/v0/services/{quoted_service_key}/1.0.3", encoded=True
3644
)
3745

3846
assert (
39-
(str(url))
47+
str(url_without_autoencode)
4048
== "http://director:8001/v0/services/simcore%2Fservices%2Fdynamic%2Fsmash/1.0.3"
4149
)
4250

services/web/server/tests/unit/with_dbs/03/test_storage_handlers.py

Lines changed: 62 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,13 @@
55

66

77
import json
8+
import urllib.parse
89
from typing import Any
910

1011
import pytest
11-
from aiohttp.test_utils import TestClient
12+
from aiohttp import web
13+
from aiohttp.test_utils import TestClient, make_mocked_request
14+
from faker import Faker
1215
from models_library.api_schemas_storage import (
1316
FileUploadCompleteResponse,
1417
FileUploadLinks,
@@ -20,16 +23,26 @@
2023
from pytest_simcore.helpers.typing_env import EnvVarsDict
2124
from pytest_simcore.helpers.webserver_login import UserInfoDict
2225
from servicelib.aiohttp.rest_responses import wrap_as_envelope
26+
from servicelib.request_keys import RQT_USERID_KEY
2327
from simcore_postgres_database.models.users import UserRole
28+
from simcore_service_webserver.application_settings import setup_settings
29+
from simcore_service_webserver.storage._handlers import (
30+
_from_storage_url,
31+
_to_storage_url,
32+
)
33+
from yarl import URL
2434

2535

2636
@pytest.fixture
27-
def app_environment(app_environment: EnvVarsDict, monkeypatch: pytest.MonkeyPatch):
37+
def app_environment(
38+
app_environment: EnvVarsDict, monkeypatch: pytest.MonkeyPatch
39+
) -> EnvVarsDict:
2840
return app_environment | setenvs_from_dict(
2941
monkeypatch,
3042
{
3143
"WEBSERVER_DB_LISTENER": "0",
3244
"WEBSERVER_GARBAGE_COLLECTOR": "null",
45+
"STORAGE_HOST": "fake-storage",
3346
},
3447
)
3548

@@ -49,7 +62,7 @@ def _resolve(*args, **kwargs) -> AnyUrl:
4962
return parse_obj_as(AnyUrl, "http://private-url")
5063

5164
mocker.patch(
52-
"simcore_service_webserver.storage._handlers._unresolve_storage_url",
65+
"simcore_service_webserver.storage._handlers._from_storage_url",
5366
autospec=True,
5467
side_effect=_resolve,
5568
)
@@ -88,7 +101,7 @@ def _resolve(*args, **kwargs) -> AnyUrl:
88101

89102
@pytest.mark.parametrize("user_role", [UserRole.USER])
90103
@pytest.mark.parametrize(
91-
"file_id", [DOUBLE_ENCODE_SLASH_IN_FILE_ID, SINGLE_ENCODE_SLASH_IN_FILE_ID]
104+
"file_id", [SINGLE_ENCODE_SLASH_IN_FILE_ID, DOUBLE_ENCODE_SLASH_IN_FILE_ID]
92105
)
93106
@pytest.mark.parametrize(
94107
"method, path, body, expected_response",
@@ -159,3 +172,48 @@ async def test_openapi_regression_test(
159172
decoded_response = await response.json()
160173
assert decoded_response["error"] is None
161174
assert decoded_response["data"] is not None
175+
176+
177+
def test_url_storage_resolver_helpers(faker: Faker, app_environment: EnvVarsDict):
178+
179+
app = web.Application()
180+
setup_settings(app)
181+
182+
# NOTE: careful, first we need to encode the "/" in this file path.
183+
# For that we need safe="" option
184+
assert urllib.parse.quote("/") == "/"
185+
assert urllib.parse.quote("/", safe="") == "%2F"
186+
assert urllib.parse.quote("%2F", safe="") == "%252F"
187+
188+
file_id = urllib.parse.quote(f"{faker.uuid4()}/{faker.uuid4()}/file.py", safe="")
189+
assert "%2F" in file_id
190+
assert "%252F" not in file_id
191+
192+
url = URL(f"/v0/storage/locations/0/files/{file_id}:complete", encoded=True)
193+
assert url.raw_parts[-1] == f"{file_id}:complete"
194+
195+
web_request = make_mocked_request("GET", str(url), app=app)
196+
web_request[RQT_USERID_KEY] = faker.pyint()
197+
198+
# web -> storage
199+
storage_url = _to_storage_url(web_request)
200+
# Something like
201+
# http://storage:123/v5/locations/0/files/e3e70...c07cd%2Ff7...55%2Ffile.py:complete?user_id=8376
202+
203+
assert storage_url.raw_parts[-1] == web_request.url.raw_parts[-1]
204+
205+
assert storage_url.host == app_environment["STORAGE_HOST"]
206+
assert storage_url.port == int(app_environment["STORAGE_PORT"])
207+
assert storage_url.query["user_id"] == str(web_request[RQT_USERID_KEY])
208+
209+
# storage -> web
210+
web_url: AnyUrl = _from_storage_url(
211+
web_request, parse_obj_as(AnyUrl, str(storage_url))
212+
)
213+
214+
assert storage_url.host != web_url.host
215+
assert storage_url.port != web_url.port
216+
217+
assert isinstance(storage_url, URL) # this is a bit inconvenient
218+
assert isinstance(web_url, AnyUrl)
219+
assert str(web_url) == str(web_request.url)

0 commit comments

Comments
 (0)