Skip to content

Commit 27c3f7e

Browse files
kilinkcesarhernandezgt
authored andcommitted
Refine StringUtils#uriDecode and update documentation
Refine the StringUtils#uriDecode method in the following ways: - Use a StringBuilder instead of ByteArrayOutputStream, and only decode %-encoded sequences. - Use HexFormat.fromHexDigits to decode hex sequences. - Decode to a byte array that is only allocated if encoded sequences are encountered. This commit adds another optimization mainly for the use case where there is no encoded sequence, and updates the Javadoc of both StringUtils#uriDecode and UriUtils#decode to match the implementation. Signed-off-by: Patrick Strawderman <[email protected]> Co-Authored-by: Sebastien Deleuze <[email protected]> Closes spring-projectsgh-35253 (cherry picked from commit 24e66b6)
1 parent 83eea72 commit 27c3f7e

File tree

3 files changed

+52
-36
lines changed

3 files changed

+52
-36
lines changed

spring-core/src/main/java/org/springframework/util/StringUtils.java

Lines changed: 37 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616

1717
package org.springframework.util;
1818

19-
import java.io.ByteArrayOutputStream;
2019
import java.nio.charset.Charset;
2120
import java.util.ArrayDeque;
2221
import java.util.ArrayList;
@@ -25,6 +24,7 @@
2524
import java.util.Collections;
2625
import java.util.Deque;
2726
import java.util.Enumeration;
27+
import java.util.HexFormat;
2828
import java.util.Iterator;
2929
import java.util.LinkedHashSet;
3030
import java.util.List;
@@ -796,54 +796,60 @@ public static boolean pathEquals(String path1, String path2) {
796796
}
797797

798798
/**
799-
* Decode the given encoded URI component value. Based on the following rules:
800-
* <ul>
801-
* <li>Alphanumeric characters {@code "a"} through {@code "z"}, {@code "A"} through {@code "Z"},
802-
* and {@code "0"} through {@code "9"} stay the same.</li>
803-
* <li>Special characters {@code "-"}, {@code "_"}, {@code "."}, and {@code "*"} stay the same.</li>
804-
* <li>A sequence "{@code %<i>xy</i>}" is interpreted as a hexadecimal representation of the character.</li>
805-
* <li>For all other characters (including those already decoded), the output is undefined.</li>
806-
* </ul>
807-
* @param source the encoded String
808-
* @param charset the character set
799+
* Decode the given encoded URI component value by replacing "<i>{@code %xy}</i>" sequences
800+
* by an hexadecimal representation of the character in the specified charset, letting other
801+
* characters unchanged.
802+
* @param source the encoded {@code String}
803+
* @param charset the character encoding to use to decode the "<i>{@code %xy}</i>" sequences
809804
* @return the decoded value
810805
* @throws IllegalArgumentException when the given source contains invalid encoded sequences
811806
* @since 5.0
812-
* @see java.net.URLDecoder#decode(String, String)
807+
* @see java.net.URLDecoder#decode(String, String) java.net.URLDecoder#decode for HTML form decoding
813808
*/
814809
public static String uriDecode(String source, Charset charset) {
815810
int length = source.length();
816-
if (length == 0) {
811+
int firstPercentIndex = source.indexOf('%');
812+
if (length == 0 || firstPercentIndex < 0) {
817813
return source;
818814
}
819-
Assert.notNull(charset, "Charset must not be null");
820815

821-
ByteArrayOutputStream baos = new ByteArrayOutputStream(length);
822-
boolean changed = false;
823-
for (int i = 0; i < length; i++) {
824-
int ch = source.charAt(i);
816+
StringBuilder output = new StringBuilder(length);
817+
output.append(source, 0, firstPercentIndex);
818+
byte[] bytes = null;
819+
int i = firstPercentIndex;
820+
while (i < length) {
821+
char ch = source.charAt(i);
825822
if (ch == '%') {
826-
if (i + 2 < length) {
827-
char hex1 = source.charAt(i + 1);
828-
char hex2 = source.charAt(i + 2);
829-
int u = Character.digit(hex1, 16);
830-
int l = Character.digit(hex2, 16);
831-
if (u == -1 || l == -1) {
832-
throw new IllegalArgumentException("Invalid encoded sequence \"" + source.substring(i) + "\"");
823+
try {
824+
if (bytes == null) {
825+
bytes = new byte[(length - i) / 3];
833826
}
834-
baos.write((char) ((u << 4) + l));
835-
i += 2;
836-
changed = true;
827+
828+
int pos = 0;
829+
while (i + 2 < length && ch == '%') {
830+
bytes[pos++] = (byte) HexFormat.fromHexDigits(source, i + 1, i + 3);
831+
i += 3;
832+
if (i < length) {
833+
ch = source.charAt(i);
834+
}
835+
}
836+
837+
if (i < length && ch == '%') {
838+
throw new IllegalArgumentException("Incomplete trailing escape (%) pattern");
839+
}
840+
841+
output.append(new String(bytes, 0, pos, charset));
837842
}
838-
else {
843+
catch (NumberFormatException ex) {
839844
throw new IllegalArgumentException("Invalid encoded sequence \"" + source.substring(i) + "\"");
840845
}
841846
}
842847
else {
843-
baos.write(ch);
848+
output.append(ch);
849+
i++;
844850
}
845851
}
846-
return (changed ? StreamUtils.copyToString(baos, charset) : source);
852+
return output.toString();
847853
}
848854

849855
/**

spring-web/src/main/java/org/springframework/web/util/UriUtils.java

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -373,15 +373,16 @@ public static String decode(String source, String encoding) {
373373
}
374374

375375
/**
376-
* Decode the given encoded URI component.
377-
* <p>See {@link StringUtils#uriDecode(String, Charset)} for the decoding rules.
378-
* @param source the encoded String
379-
* @param charset the character encoding to use
376+
* Decode the given encoded URI component value by replacing "<i>{@code %xy}</i>" sequences
377+
* by an hexadecimal representation of the character in the specified charset, letting other
378+
* characters unchanged.
379+
* @param source the encoded {@code String}
380+
* @param charset the character encoding to use to decode the "<i>{@code %xy}</i>" sequences
380381
* @return the decoded value
381382
* @throws IllegalArgumentException when the given source contains invalid encoded sequences
382383
* @since 5.0
383384
* @see StringUtils#uriDecode(String, Charset)
384-
* @see java.net.URLDecoder#decode(String, String)
385+
* @see java.net.URLDecoder#decode(String, String) java.net.URLDecoder#decode for HTML form decoding
385386
*/
386387
public static String decode(String source, Charset charset) {
387388
return StringUtils.uriDecode(source, charset);

spring-web/src/test/java/org/springframework/web/util/UriUtilsTests.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,12 +107,21 @@ void decode() {
107107
assertThat(UriUtils.decode("T%C5%8Dky%C5%8D", CHARSET)).as("Invalid encoded result").isEqualTo("T\u014dky\u014d");
108108
assertThat(UriUtils.decode("/Z%C3%BCrich", CHARSET)).as("Invalid encoded result").isEqualTo("/Z\u00fcrich");
109109
assertThat(UriUtils.decode("T\u014dky\u014d", CHARSET)).as("Invalid encoded result").isEqualTo("T\u014dky\u014d");
110+
assertThat(UriUtils.decode("%20\u2019", CHARSET)).as("Invalid encoded result").isEqualTo(" \u2019");
111+
assertThat(UriUtils.decode("\u015bp\u0159\u00ec\u0144\u0121", CHARSET)).as("Invalid encoded result").isEqualTo("śpřìńġ");
112+
assertThat(UriUtils.decode("%20\u015bp\u0159\u00ec\u0144\u0121", CHARSET)).as("Invalid encoded result").isEqualTo(" śpřìńġ");
110113
}
111114

112115
@Test
113116
void decodeInvalidSequence() {
114117
assertThatIllegalArgumentException().isThrownBy(() ->
115118
UriUtils.decode("foo%2", CHARSET));
119+
assertThatIllegalArgumentException().isThrownBy(() ->
120+
UriUtils.decode("foo%", CHARSET));
121+
assertThatIllegalArgumentException().isThrownBy(() ->
122+
UriUtils.decode("%", CHARSET));
123+
assertThatIllegalArgumentException().isThrownBy(() ->
124+
UriUtils.decode("%zz", CHARSET));
116125
}
117126

118127
@Test

0 commit comments

Comments
 (0)