Skip to content

Commit bd366c0

Browse files
authored
Properly escape multi-segment path parameters (#252)
## Changes Ports databricks/databricks-sdk-go#869 to Java SDK. Currently, path parameters are directly interpolated into the request URL without escaping. This means that characters like `/`, `?` and `#` will not be percent-encoded and will affect the semantics of the URL, starting a new path segment, query parameters, or fragment, respectively. This means that it is impossible for users of the Files API to upload/download objects that contain `?` or `#` in their name. `/` is allowed in the path of the Files API, so it does not need to be escaped. The Files API is currently marked with `x-databricks-multi-segment`, indicating that it should be permitted to have `/` characters but other characters need to be percent-encoded. This PR implements this. ## Tests - [x] Unit test for multi-segment path escaping behavior. - [x] Updated integration test to use # and ? symbols in the file name.
1 parent de70b8b commit bd366c0

File tree

5 files changed

+164
-12
lines changed

5 files changed

+164
-12
lines changed

.codegen/impl.java.tmpl

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import java.util.HashMap;
99

1010
import com.databricks.sdk.core.ApiClient;
1111
import com.databricks.sdk.core.DatabricksException;
12+
import com.databricks.sdk.core.http.Encoding;
1213
import com.databricks.sdk.support.Generated;
1314

1415
{{range .Package.ImportedEntities}}
@@ -25,9 +26,7 @@ class {{.PascalName}}Impl implements {{.PascalName}}Service {
2526
{{range .Methods}}
2627
@Override
2728
public {{if not .Response.IsEmpty -}}{{template "type" .Response}}{{else}}void{{end}} {{.CamelName}}{{if .IsNameReserved}}Content{{end}}({{if .Request}}{{template "type" .Request}} request{{end}}) {
28-
String path = {{if .PathParts -}}
29-
String.format("{{range .PathParts}}{{.Prefix}}{{if or .Field .IsAccountId}}%s{{end}}{{ end }}"{{ range .PathParts }}{{if .Field}}, request.get{{.Field.PascalName}}(){{ else if .IsAccountId }}, apiClient.configuredAccountID(){{end}}{{ end }})
30-
{{- else}}"{{.Path}}"{{end}};
29+
String path = {{ template "path" . }};
3130
{{ template "headers" . -}}
3231
{{ if .Response.IsEmpty -}}
3332
{{ template "api-call" . }}
@@ -39,6 +38,23 @@ class {{.PascalName}}Impl implements {{.PascalName}}Service {
3938
{{end}}
4039
}
4140

41+
{{- define "path" -}}
42+
{{- if .PathParts -}}
43+
String.format("{{range .PathParts -}}
44+
{{- .Prefix -}}
45+
{{- if or .Field .IsAccountId -}}%s{{- end -}}
46+
{{- end -}}"
47+
{{- range .PathParts -}}
48+
{{- if and .Field .Field.IsPathMultiSegment -}}, Encoding.encodeMultiSegmentPathParameter(request.get{{.Field.PascalName}}())
49+
{{- else if .Field -}}, request.get{{.Field.PascalName}}()
50+
{{- else if .IsAccountId -}}, apiClient.configuredAccountID()
51+
{{- end -}}
52+
{{- end -}})
53+
{{- else -}}
54+
"{{.Path}}"
55+
{{- end -}}
56+
{{- end -}}
57+
4258
{{ define "api-call" }}
4359
apiClient.{{.Verb}}(path
4460
{{- if .Request}}, {{ template "request-param" .}}{{end}}
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
package com.databricks.sdk.core.http;
2+
3+
import java.nio.ByteBuffer;
4+
import java.nio.charset.Charset;
5+
import java.nio.charset.StandardCharsets;
6+
import java.util.BitSet;
7+
8+
/**
9+
* Utility class for encoding strings for use in URLs.
10+
*
11+
* <p>Adapted from URLEncodingUtils.java from Apache's HttpClient library.
12+
*/
13+
public class Encoding {
14+
15+
/**
16+
* Unreserved characters, i.e. alphanumeric, plus: {@code _ - ! . ~ ' ( ) *}
17+
*
18+
* <p>This list is the same as the {@code unreserved} list in <a
19+
* href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>
20+
*/
21+
private static final BitSet UNRESERVED = new BitSet(256);
22+
23+
/**
24+
* Characters which are safe to use in a path, excluding /, i.e. {@link #UNRESERVED} plus
25+
* punctuation plus @
26+
*/
27+
private static final BitSet PATHSAFE = new BitSet(256);
28+
29+
/** Characters which are safe to use in a path, including /. */
30+
private static final BitSet PATH_SPECIAL = new BitSet(256);
31+
32+
static {
33+
// unreserved chars
34+
// alpha characters
35+
for (int i = 'a'; i <= 'z'; i++) {
36+
UNRESERVED.set(i);
37+
}
38+
for (int i = 'A'; i <= 'Z'; i++) {
39+
UNRESERVED.set(i);
40+
}
41+
// numeric characters
42+
for (int i = '0'; i <= '9'; i++) {
43+
UNRESERVED.set(i);
44+
}
45+
UNRESERVED.set('_'); // these are the characters of the "mark" list
46+
UNRESERVED.set('-');
47+
UNRESERVED.set('.');
48+
UNRESERVED.set('*');
49+
UNRESERVED.set('!');
50+
UNRESERVED.set('~');
51+
UNRESERVED.set('\'');
52+
UNRESERVED.set('(');
53+
UNRESERVED.set(')');
54+
55+
// URL path safe
56+
PATHSAFE.or(UNRESERVED);
57+
PATHSAFE.set(';'); // param separator
58+
PATHSAFE.set(':'); // RFC 2396
59+
PATHSAFE.set('@');
60+
PATHSAFE.set('&');
61+
PATHSAFE.set('=');
62+
PATHSAFE.set('+');
63+
PATHSAFE.set('$');
64+
PATHSAFE.set(',');
65+
66+
PATH_SPECIAL.or(PATHSAFE);
67+
PATH_SPECIAL.set('/');
68+
}
69+
70+
private static final int RADIX = 16;
71+
72+
private static String urlEncode(
73+
final String content,
74+
final Charset charset,
75+
final BitSet safechars,
76+
final boolean blankAsPlus) {
77+
if (content == null) {
78+
return null;
79+
}
80+
final StringBuilder buf = new StringBuilder();
81+
final ByteBuffer bb = charset.encode(content);
82+
while (bb.hasRemaining()) {
83+
final int b = bb.get() & 0xff;
84+
if (safechars.get(b)) {
85+
buf.append((char) b);
86+
} else if (blankAsPlus && b == ' ') {
87+
buf.append('+');
88+
} else {
89+
buf.append("%");
90+
final char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, RADIX));
91+
final char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, RADIX));
92+
buf.append(hex1);
93+
buf.append(hex2);
94+
}
95+
}
96+
return buf.toString();
97+
}
98+
99+
public static String encodeMultiSegmentPathParameter(String param) {
100+
return urlEncode(param, StandardCharsets.UTF_8, PATH_SPECIAL, false);
101+
}
102+
}

databricks-sdk-java/src/main/java/com/databricks/sdk/service/files/FilesImpl.java

Lines changed: 29 additions & 8 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
package com.databricks.sdk.core.http;
2+
3+
import static org.junit.jupiter.api.Assertions.assertEquals;
4+
5+
import org.junit.jupiter.api.Test;
6+
7+
public class EncodingTest {
8+
@Test
9+
public void encodeMultiSegmentPathParameter() {
10+
assertEquals("/foo/bar", Encoding.encodeMultiSegmentPathParameter("/foo/bar"));
11+
assertEquals("a%3Fb%23c", Encoding.encodeMultiSegmentPathParameter("a?b#c"));
12+
}
13+
}

databricks-sdk-java/src/test/java/com/databricks/sdk/integration/FilesIT.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ void uploadAndDownloadFile(WorkspaceClient workspace) throws IOException {
2828
workspace,
2929
(volumePath) -> {
3030
// Generate a random file name and random contents of 10 KiB.
31-
String fileName = NameUtils.uniqueName(volumePath + "/test");
31+
String fileName = NameUtils.uniqueName(volumePath + "/test-with-?-and-#");
3232
byte[] fileContents = new byte[1024 * 10];
3333
for (int i = 0; i < fileContents.length; i++) {
3434
fileContents[i] = (byte) (i & 0xFF);

0 commit comments

Comments
 (0)