Skip to content

Commit 24e66b6

Browse files
kilinkbclozel
authored andcommitted
Refine StringUtils#uriDecode and update documentation
Refine the StringUtils#uriDecode method in the following ways: - Use a StringBuilder instead of ByteArrayOutputStream, and only decode %-encoded sequences. - Use HexFormat.fromHexDigits to decode hex sequences. - Decode to a byte array that is only allocated if encoded sequences are encountered. This commit adds another optimization mainly for the use case where there is no encoded sequence, and updates the Javadoc of both StringUtils#uriDecode and UriUtils#decode to match the implementation. Signed-off-by: Patrick Strawderman <[email protected]> Co-Authored-by: Sebastien Deleuze <[email protected]> Closes gh-35253
1 parent f3832c7 commit 24e66b6

File tree

3 files changed

+52
-36
lines changed

3 files changed

+52
-36
lines changed

spring-core/src/main/java/org/springframework/util/StringUtils.java

Lines changed: 37 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616

1717
package org.springframework.util;
1818

19-
import java.io.ByteArrayOutputStream;
2019
import java.nio.charset.Charset;
2120
import java.util.ArrayDeque;
2221
import java.util.ArrayList;
@@ -25,6 +24,7 @@
2524
import java.util.Collections;
2625
import java.util.Deque;
2726
import java.util.Enumeration;
27+
import java.util.HexFormat;
2828
import java.util.Iterator;
2929
import java.util.LinkedHashSet;
3030
import java.util.List;
@@ -803,54 +803,60 @@ public static boolean pathEquals(String path1, String path2) {
803803
}
804804

805805
/**
806-
* Decode the given encoded URI component value. Based on the following rules:
807-
* <ul>
808-
* <li>Alphanumeric characters {@code "a"} through {@code "z"}, {@code "A"} through {@code "Z"},
809-
* and {@code "0"} through {@code "9"} stay the same.</li>
810-
* <li>Special characters {@code "-"}, {@code "_"}, {@code "."}, and {@code "*"} stay the same.</li>
811-
* <li>A sequence "<i>{@code %xy}</i>" is interpreted as a hexadecimal representation of the character.</li>
812-
* <li>For all other characters (including those already decoded), the output is undefined.</li>
813-
* </ul>
814-
* @param source the encoded String
815-
* @param charset the character set
806+
* Decode the given encoded URI component value by replacing "<i>{@code %xy}</i>" sequences
807+
* by an hexadecimal representation of the character in the specified charset, letting other
808+
* characters unchanged.
809+
* @param source the encoded {@code String}
810+
* @param charset the character encoding to use to decode the "<i>{@code %xy}</i>" sequences
816811
* @return the decoded value
817812
* @throws IllegalArgumentException when the given source contains invalid encoded sequences
818813
* @since 5.0
819-
* @see java.net.URLDecoder#decode(String, String)
814+
* @see java.net.URLDecoder#decode(String, String) java.net.URLDecoder#decode for HTML form decoding
820815
*/
821816
public static String uriDecode(String source, Charset charset) {
822817
int length = source.length();
823-
if (length == 0) {
818+
int firstPercentIndex = source.indexOf('%');
819+
if (length == 0 || firstPercentIndex < 0) {
824820
return source;
825821
}
826-
Assert.notNull(charset, "Charset must not be null");
827822

828-
ByteArrayOutputStream baos = new ByteArrayOutputStream(length);
829-
boolean changed = false;
830-
for (int i = 0; i < length; i++) {
831-
int ch = source.charAt(i);
823+
StringBuilder output = new StringBuilder(length);
824+
output.append(source, 0, firstPercentIndex);
825+
byte[] bytes = null;
826+
int i = firstPercentIndex;
827+
while (i < length) {
828+
char ch = source.charAt(i);
832829
if (ch == '%') {
833-
if (i + 2 < length) {
834-
char hex1 = source.charAt(i + 1);
835-
char hex2 = source.charAt(i + 2);
836-
int u = Character.digit(hex1, 16);
837-
int l = Character.digit(hex2, 16);
838-
if (u == -1 || l == -1) {
839-
throw new IllegalArgumentException("Invalid encoded sequence \"" + source.substring(i) + "\"");
830+
try {
831+
if (bytes == null) {
832+
bytes = new byte[(length - i) / 3];
840833
}
841-
baos.write((char) ((u << 4) + l));
842-
i += 2;
843-
changed = true;
834+
835+
int pos = 0;
836+
while (i + 2 < length && ch == '%') {
837+
bytes[pos++] = (byte) HexFormat.fromHexDigits(source, i + 1, i + 3);
838+
i += 3;
839+
if (i < length) {
840+
ch = source.charAt(i);
841+
}
842+
}
843+
844+
if (i < length && ch == '%') {
845+
throw new IllegalArgumentException("Incomplete trailing escape (%) pattern");
846+
}
847+
848+
output.append(new String(bytes, 0, pos, charset));
844849
}
845-
else {
850+
catch (NumberFormatException ex) {
846851
throw new IllegalArgumentException("Invalid encoded sequence \"" + source.substring(i) + "\"");
847852
}
848853
}
849854
else {
850-
baos.write(ch);
855+
output.append(ch);
856+
i++;
851857
}
852858
}
853-
return (changed ? StreamUtils.copyToString(baos, charset) : source);
859+
return output.toString();
854860
}
855861

856862
/**

spring-web/src/main/java/org/springframework/web/util/UriUtils.java

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -373,15 +373,16 @@ public static String decode(String source, String encoding) {
373373
}
374374

375375
/**
376-
* Decode the given encoded URI component.
377-
* <p>See {@link StringUtils#uriDecode(String, Charset)} for the decoding rules.
378-
* @param source the encoded String
379-
* @param charset the character encoding to use
376+
* Decode the given encoded URI component value by replacing "<i>{@code %xy}</i>" sequences
377+
* by an hexadecimal representation of the character in the specified charset, letting other
378+
* characters unchanged.
379+
* @param source the encoded {@code String}
380+
* @param charset the character encoding to use to decode the "<i>{@code %xy}</i>" sequences
380381
* @return the decoded value
381382
* @throws IllegalArgumentException when the given source contains invalid encoded sequences
382383
* @since 5.0
383384
* @see StringUtils#uriDecode(String, Charset)
384-
* @see java.net.URLDecoder#decode(String, String)
385+
* @see java.net.URLDecoder#decode(String, String) java.net.URLDecoder#decode for HTML form decoding
385386
*/
386387
public static String decode(String source, Charset charset) {
387388
return StringUtils.uriDecode(source, charset);

spring-web/src/test/java/org/springframework/web/util/UriUtilsTests.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,12 +107,21 @@ void decode() {
107107
assertThat(UriUtils.decode("T%C5%8Dky%C5%8D", CHARSET)).as("Invalid encoded result").isEqualTo("T\u014dky\u014d");
108108
assertThat(UriUtils.decode("/Z%C3%BCrich", CHARSET)).as("Invalid encoded result").isEqualTo("/Z\u00fcrich");
109109
assertThat(UriUtils.decode("T\u014dky\u014d", CHARSET)).as("Invalid encoded result").isEqualTo("T\u014dky\u014d");
110+
assertThat(UriUtils.decode("%20\u2019", CHARSET)).as("Invalid encoded result").isEqualTo(" \u2019");
111+
assertThat(UriUtils.decode("\u015bp\u0159\u00ec\u0144\u0121", CHARSET)).as("Invalid encoded result").isEqualTo("śpřìńġ");
112+
assertThat(UriUtils.decode("%20\u015bp\u0159\u00ec\u0144\u0121", CHARSET)).as("Invalid encoded result").isEqualTo(" śpřìńġ");
110113
}
111114

112115
@Test
113116
void decodeInvalidSequence() {
114117
assertThatIllegalArgumentException().isThrownBy(() ->
115118
UriUtils.decode("foo%2", CHARSET));
119+
assertThatIllegalArgumentException().isThrownBy(() ->
120+
UriUtils.decode("foo%", CHARSET));
121+
assertThatIllegalArgumentException().isThrownBy(() ->
122+
UriUtils.decode("%", CHARSET));
123+
assertThatIllegalArgumentException().isThrownBy(() ->
124+
UriUtils.decode("%zz", CHARSET));
116125
}
117126

118127
@Test

0 commit comments

Comments
 (0)