Skip to content

Commit e4d7958

Browse files
authored
Properly escape multi-segment path parameters (#596)
## Changes Ports databricks/databricks-sdk-go#869 to Python SDK. Subsumes #592. Currently, path parameters are directly interpolated into the request URL without escaping. This means that characters like /, ? and # will not be percent-encoded and will affect the semantics of the URL, starting a new path segment, query parameters, or fragment, respectively. This means that it is impossible for users of the Files API to upload/download objects that contain ? or # in their name. / is allowed in the path of the Files API, so it does not need to be escaped. The Files API is currently marked with x-databricks-multi-segment, indicating that it should be permitted to have / characters but other characters need to be percent-encoded. This PR implements this. ## Tests - [x] Unit test for multi-segment path escaping behavior. - [x] Updated integration test to use # and ? symbols in the file name. This failed on main but succeeded on this PR.
1 parent 7ae30e3 commit e4d7958

File tree

5 files changed

+68
-19
lines changed

5 files changed

+68
-19
lines changed

.codegen/service.py.tmpl

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ import time
99
import random
1010
import logging
1111
from ..errors import OperationTimeout, OperationFailed
12-
from ._internal import _enum, _from_dict, _repeated_dict, _repeated_enum, Wait
12+
from ._internal import _enum, _from_dict, _repeated_dict, _repeated_enum, Wait, _escape_multi_segment_path_parameter
1313

1414
_LOG = logging.getLogger('databricks.sdk')
1515

@@ -325,9 +325,7 @@ class {{.PascalName}}API:{{if .Description}}
325325

326326
{{define "method-do" -}}
327327
self._api.do('{{.Verb}}',
328-
{{if .PathParts -}}
329-
f'{{range .PathParts}}{{.Prefix}}{{if .Field}}{{"{"}}{{template "safe-snake-name" .Field}}{{with .Field.Entity.Enum}}.value{{end}}{{"}"}}{{else if .IsAccountId}}{{"{self._api.account_id}"}}{{end}}{{ end }}'
330-
{{- else}}'{{.Path}}'{{end}}
328+
{{ template "path" . }}
331329
{{if .Request}}
332330
{{- if .Request.HasQueryField}}, query=query{{end}}
333331
{{- if .Request.MapValue}}, body=contents
@@ -341,6 +339,31 @@ self._api.do('{{.Verb}}',
341339
{{- if .IsResponseByteStream }}, raw=True{{ end }})
342340
{{- end}}
343341

342+
{{- define "path" -}}
343+
{{- if .PathParts -}}
344+
f'{{range .PathParts -}}
345+
{{- .Prefix -}}
346+
{{- if .Field -}}
347+
{{- "{" -}}
348+
{{- if .Field.IsPathMultiSegment -}}_escape_multi_segment_path_parameter({{ template "path-parameter" . }})
349+
{{- else -}}{{ template "path-parameter" . }}
350+
{{- end -}}
351+
{{- "}" -}}
352+
{{- else if .IsAccountId}}
353+
{{- "{" -}}
354+
self._api.account_id
355+
{{- "}" -}}
356+
{{- end -}}
357+
{{- end }}'
358+
{{- else -}}
359+
'{{.Path}}'
360+
{{- end -}}
361+
{{- end -}}
362+
363+
{{- define "path-parameter" -}}
364+
{{template "safe-snake-name" .Field}}{{with .Field.Entity.Enum}}.value{{end}}
365+
{{- end -}}
366+
344367
{{define "method-return-type" -}}
345368
{{if and .Wait (and (not .IsCrudRead) (not (eq .SnakeName "get_run"))) }} -> Wait[{{.Wait.Poll.Response.PascalName}}]
346369
{{- else if not .Response.IsEmpty }} -> {{if .Response.ArrayValue -}}

databricks/sdk/service/_internal.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import datetime
2+
import urllib.parse
23
from typing import Callable, Dict, Generic, Optional, Type, TypeVar
34

45

@@ -38,6 +39,10 @@ def _repeated_enum(d: Dict[str, any], field: str, cls: Type) -> any:
3839
return res
3940

4041

42+
def _escape_multi_segment_path_parameter(param: str) -> str:
43+
return urllib.parse.quote(param)
44+
45+
4146
ReturnType = TypeVar('ReturnType')
4247

4348

databricks/sdk/service/files.py

Lines changed: 25 additions & 12 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/integration/test_files.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,7 @@ def test_files_api_upload_download(ucws, random):
219219
with ResourceWithCleanup.create_schema(w, 'main', schema):
220220
with ResourceWithCleanup.create_volume(w, 'main', schema, volume):
221221
f = io.BytesIO(b"some text data")
222-
target_file = f'/Volumes/main/{schema}/{volume}/filesit-{random()}.txt'
222+
target_file = f'/Volumes/main/{schema}/{volume}/filesit-with-?-and-#-{random()}.txt'
223223
w.files.upload(target_file, f)
224224
with w.files.download(target_file).contents as f:
225225
assert f.read() == b"some text data"

tests/test_internal.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
from dataclasses import dataclass
22
from enum import Enum
33

4-
from databricks.sdk.service._internal import (_enum, _from_dict,
5-
_repeated_dict, _repeated_enum)
4+
from databricks.sdk.service._internal import (
5+
_enum, _escape_multi_segment_path_parameter, _from_dict, _repeated_dict,
6+
_repeated_enum)
67

78

89
class A(Enum):
@@ -41,3 +42,10 @@ def test_from_dict():
4142

4243
def test_repeated_dict():
4344
assert _repeated_dict({'x': [{'field': 'a'}, {'field': 'c'}]}, 'x', B) == [B('a'), B('c')]
45+
46+
47+
def test_escape_multi_segment_path_parameter():
48+
assert _escape_multi_segment_path_parameter('a b') == 'a%20b'
49+
assert _escape_multi_segment_path_parameter('a/b') == 'a/b'
50+
assert _escape_multi_segment_path_parameter('a?b') == 'a%3Fb'
51+
assert _escape_multi_segment_path_parameter('a#b') == 'a%23b'

0 commit comments

Comments
 (0)