Skip to content

Commit ff2af66

Browse files
committed
PathPatternParser encodes patterns as it parses them
Before this commit there was no special handling for URL encoding of the path pattern string coming into the path pattern parser. No assumptions were made about it being in an encoded form or not. With this change it is assumed incoming path patterns are not encoded and as part of parsing the parser builds PathPattern objects that include encoded elements. For example parsing "/f o" will create a path pattern of the form "/f%20o". In this form it can then be used to match against encoded paths. Handling encoded characters is not trivial and has resulted in some loss in matching speed but care has been taken to avoid unnecessary creation of additional heap objects. When matching variables the variable values are return in a decoded form. It is hoped the speed can be recovered, at least for the common case of non-encoded incoming paths. Issue: SPR-15640
1 parent c0550f7 commit ff2af66

File tree

9 files changed

+332
-44
lines changed

9 files changed

+332
-44
lines changed

spring-web/src/main/java/org/springframework/web/util/pattern/CaptureTheRestPathElement.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,8 @@ public boolean matches(int candidateIndex, MatchingContext matchingContext) {
5656
matchingContext.remainingPathIndex = matchingContext.candidateLength;
5757
}
5858
if (matchingContext.extractingVariables) {
59-
matchingContext.set(variableName, new String(matchingContext.candidate, candidateIndex,
60-
matchingContext.candidateLength - candidateIndex));
59+
matchingContext.set(variableName, decode(new String(matchingContext.candidate, candidateIndex,
60+
matchingContext.candidateLength - candidateIndex)));
6161
}
6262
return true;
6363
}

spring-web/src/main/java/org/springframework/web/util/pattern/CaptureVariablePathElement.java

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,13 @@
1616

1717
package org.springframework.web.util.pattern;
1818

19+
import java.io.UnsupportedEncodingException;
20+
import java.nio.charset.StandardCharsets;
1921
import java.util.regex.Matcher;
2022
import java.util.regex.Pattern;
2123

24+
import org.springframework.web.util.UriUtils;
25+
2226
/**
2327
* A path element representing capturing a piece of the path as a variable. In the pattern
2428
* '/foo/{bar}/goo' the {bar} is represented as a {@link CaptureVariablePathElement}. There
@@ -74,10 +78,22 @@ public boolean matches(int candidateIndex, PathPattern.MatchingContext matchingC
7478
return false;
7579
}
7680

81+
String substringForDecoding = null;
7782
CharSequence candidateCapture = null;
7883
if (this.constraintPattern != null) {
7984
// TODO possible optimization - only regex match if rest of pattern matches? Benefit likely to vary pattern to pattern
80-
candidateCapture = new SubSequence(matchingContext.candidate, candidateIndex, nextPos);
85+
if (includesPercent(matchingContext.candidate, candidateIndex, nextPos)) {
86+
substringForDecoding = new String(matchingContext.candidate, candidateIndex, nextPos);
87+
try {
88+
candidateCapture = UriUtils.decode(substringForDecoding,StandardCharsets.UTF_8.name());
89+
}
90+
catch (UnsupportedEncodingException e) {
91+
throw new IllegalStateException(e);
92+
}
93+
}
94+
else {
95+
candidateCapture = new SubSequence(matchingContext.candidate, candidateIndex, nextPos);
96+
}
8197
Matcher matcher = constraintPattern.matcher(candidateCapture);
8298
if (matcher.groupCount() != 0) {
8399
throw new IllegalArgumentException(
@@ -115,7 +131,8 @@ public boolean matches(int candidateIndex, PathPattern.MatchingContext matchingC
115131

116132
if (match && matchingContext.extractingVariables) {
117133
matchingContext.set(this.variableName,
118-
new String(matchingContext.candidate, candidateIndex, nextPos - candidateIndex));
134+
candidateCapture != null ? candidateCapture.toString():
135+
decode(new String(matchingContext.candidate, candidateIndex, nextPos - candidateIndex)));
119136
}
120137
return match;
121138
}

spring-web/src/main/java/org/springframework/web/util/pattern/InternalPathPatternParser.java

Lines changed: 33 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,13 @@
1616

1717
package org.springframework.web.util.pattern;
1818

19+
import java.io.UnsupportedEncodingException;
20+
import java.nio.charset.StandardCharsets;
1921
import java.util.ArrayList;
2022
import java.util.List;
2123
import java.util.regex.PatternSyntaxException;
2224

25+
import org.springframework.web.util.UriUtils;
2326
import org.springframework.web.util.pattern.PatternParseException.PatternMessage;
2427

2528
/**
@@ -162,7 +165,7 @@ else if (ch == '}') {
162165
this.variableCaptureCount++;
163166
}
164167
else if (ch == ':') {
165-
if (this.insideVariableCapture) {
168+
if (this.insideVariableCapture && !this.isCaptureTheRestVariable) {
166169
skipCaptureRegex();
167170
this.insideVariableCapture = false;
168171
this.variableCaptureCount++;
@@ -304,6 +307,26 @@ else if (this.currentPE instanceof SeparatorPathElement) {
304307

305308
resetPathElementState();
306309
}
310+
311+
private char[] getPathElementText(boolean encodeElement) {
312+
char[] pathElementText = new char[this.pos - this.pathElementStart];
313+
if (encodeElement) {
314+
try {
315+
String unencoded = new String(this.pathPatternData, this.pathElementStart, this.pos - this.pathElementStart);
316+
String encoded = UriUtils.encodeFragment(unencoded, StandardCharsets.UTF_8.name());
317+
pathElementText = encoded.toCharArray();
318+
}
319+
catch (UnsupportedEncodingException ex) {
320+
// Should never happen...
321+
throw new IllegalStateException(ex);
322+
}
323+
}
324+
else {
325+
System.arraycopy(this.pathPatternData, this.pathElementStart, pathElementText, 0,
326+
this.pos - this.pathElementStart);
327+
}
328+
return pathElementText;
329+
}
307330

308331
/**
309332
* Used the knowledge built up whilst processing since the last path element to determine what kind of path
@@ -314,23 +337,20 @@ private PathElement createPathElement() {
314337
if (this.insideVariableCapture) {
315338
throw new PatternParseException(this.pos, this.pathPatternData, PatternMessage.MISSING_CLOSE_CAPTURE);
316339
}
317-
318-
char[] pathElementText = new char[this.pos - this.pathElementStart];
319-
System.arraycopy(this.pathPatternData, this.pathElementStart, pathElementText, 0,
320-
this.pos - this.pathElementStart);
340+
321341
PathElement newPE = null;
322342

323343
if (this.variableCaptureCount > 0) {
324344
if (this.variableCaptureCount == 1 && this.pathElementStart == this.variableCaptureStart &&
325345
this.pathPatternData[this.pos - 1] == '}') {
326346
if (this.isCaptureTheRestVariable) {
327347
// It is {*....}
328-
newPE = new CaptureTheRestPathElement(pathElementStart, pathElementText, separator);
348+
newPE = new CaptureTheRestPathElement(pathElementStart, getPathElementText(false), separator);
329349
}
330350
else {
331351
// It is a full capture of this element (possibly with constraint), for example: /foo/{abc}/
332352
try {
333-
newPE = new CaptureVariablePathElement(this.pathElementStart, pathElementText,
353+
newPE = new CaptureVariablePathElement(this.pathElementStart, getPathElementText(false),
334354
this.caseSensitive, this.separator);
335355
}
336356
catch (PatternSyntaxException pse) {
@@ -347,8 +367,9 @@ private PathElement createPathElement() {
347367
throw new PatternParseException(this.pathElementStart, this.pathPatternData,
348368
PatternMessage.CAPTURE_ALL_IS_STANDALONE_CONSTRUCT);
349369
}
350-
RegexPathElement newRegexSection = new RegexPathElement(this.pathElementStart, pathElementText,
351-
this.caseSensitive, this.pathPatternData, this.separator);
370+
RegexPathElement newRegexSection = new RegexPathElement(this.pathElementStart,
371+
getPathElementText(false), this.caseSensitive,
372+
this.pathPatternData, this.separator);
352373
for (String variableName : newRegexSection.getVariableNames()) {
353374
recordCapturedVariable(this.pathElementStart, variableName);
354375
}
@@ -361,16 +382,16 @@ private PathElement createPathElement() {
361382
newPE = new WildcardPathElement(this.pathElementStart, this.separator);
362383
}
363384
else {
364-
newPE = new RegexPathElement(this.pathElementStart, pathElementText,
385+
newPE = new RegexPathElement(this.pathElementStart, getPathElementText(false),
365386
this.caseSensitive, this.pathPatternData, this.separator);
366387
}
367388
}
368389
else if (this.singleCharWildcardCount != 0) {
369-
newPE = new SingleCharWildcardedPathElement(this.pathElementStart, pathElementText,
390+
newPE = new SingleCharWildcardedPathElement(this.pathElementStart, getPathElementText(true),
370391
this.singleCharWildcardCount, this.caseSensitive, this.separator);
371392
}
372393
else {
373-
newPE = new LiteralPathElement(this.pathElementStart, pathElementText,
394+
newPE = new LiteralPathElement(this.pathElementStart, getPathElementText(true),
374395
this.caseSensitive, this.separator);
375396
}
376397
}

spring-web/src/main/java/org/springframework/web/util/pattern/LiteralPathElement.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,11 @@ public boolean matches(int candidateIndex, MatchingContext matchingContext) {
5757
if (this.caseSensitive) {
5858
for (int i = 0; i < len; i++) {
5959
if (matchingContext.candidate[candidateIndex++] != this.text[i]) {
60-
return false;
60+
// TODO unfortunate performance hit here on comparison when encoded data is the less likely case
61+
if (i < 3 || matchingContext.candidate[candidateIndex-3] != '%' ||
62+
Character.toUpperCase(matchingContext.candidate[candidateIndex-1]) != this.text[i]) {
63+
return false;
64+
}
6165
}
6266
}
6367
}

spring-web/src/main/java/org/springframework/web/util/pattern/PathElement.java

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@
1616

1717
package org.springframework.web.util.pattern;
1818

19+
import java.io.UnsupportedEncodingException;
20+
import java.nio.charset.StandardCharsets;
21+
22+
import org.springframework.web.util.UriUtils;
1923
import org.springframework.web.util.pattern.PathPattern.MatchingContext;
2024

2125
/**
@@ -65,7 +69,7 @@ abstract class PathElement {
6569
public abstract boolean matches(int candidatePos, MatchingContext matchingContext);
6670

6771
/**
68-
* Return the length of the path element where captures are considered to be one character long.
72+
* @return the length of the path element where captures are considered to be one character long.
6973
*/
7074
public abstract int getNormalizedLength();
7175

@@ -98,4 +102,50 @@ protected boolean nextIfExistsIsSeparator(int nextIndex, MatchingContext matchin
98102
matchingContext.candidate[nextIndex] == this.separator);
99103
}
100104

105+
/**
106+
* Decode an input CharSequence if necessary.
107+
* @param toDecode the input char sequence that should be decoded if necessary
108+
* @returns the decoded result
109+
*/
110+
protected String decode(CharSequence toDecode) {
111+
CharSequence decoded = toDecode;
112+
if (includesPercent(toDecode)) {
113+
try {
114+
decoded = UriUtils.decode(toDecode.toString(), StandardCharsets.UTF_8.name());
115+
}
116+
catch (UnsupportedEncodingException e) {
117+
throw new IllegalStateException(e);
118+
}
119+
}
120+
return decoded.toString();
121+
}
122+
123+
/**
124+
* @param char sequence of characters
125+
* @param from start position (included in check)
126+
* @param to end position (excluded from check)
127+
* @return true if the chars array includes a '%' character between the specified positions
128+
*/
129+
protected boolean includesPercent(char[] chars, int from, int to) {
130+
for (int i = from; i < to; i++) {
131+
if (chars[i] == '%') {
132+
return true;
133+
}
134+
}
135+
return false;
136+
}
137+
138+
/**
139+
* @param chars string that may include a '%' character indicating it is encoded
140+
* @return true if the string contains a '%' character
141+
*/
142+
protected boolean includesPercent(CharSequence chars) {
143+
for (int i = 0, max = chars.length(); i < max; i++) {
144+
if (chars.charAt(i) == '%') {
145+
return true;
146+
}
147+
}
148+
return false;
149+
}
150+
101151
}

spring-web/src/main/java/org/springframework/web/util/pattern/RegexPathElement.java

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,15 @@
1616

1717
package org.springframework.web.util.pattern;
1818

19+
import java.io.UnsupportedEncodingException;
20+
import java.nio.charset.StandardCharsets;
1921
import java.util.LinkedList;
2022
import java.util.List;
2123
import java.util.regex.Matcher;
2224
import java.util.regex.Pattern;
2325

2426
import org.springframework.util.AntPathMatcher;
27+
import org.springframework.web.util.UriUtils;
2528
import org.springframework.web.util.pattern.PathPattern.MatchingContext;
2629

2730
/**
@@ -39,7 +42,7 @@ class RegexPathElement extends PathElement {
3942
private final String DEFAULT_VARIABLE_PATTERN = "(.*)";
4043

4144

42-
private final char[] regex;
45+
private char[] regex;
4346

4447
private final boolean caseSensitive;
4548

@@ -61,17 +64,20 @@ class RegexPathElement extends PathElement {
6164
public Pattern buildPattern(char[] regex, char[] completePattern) {
6265
StringBuilder patternBuilder = new StringBuilder();
6366
String text = new String(regex);
67+
StringBuilder encodedRegexBuilder = new StringBuilder();
6468
Matcher matcher = GLOB_PATTERN.matcher(text);
6569
int end = 0;
6670

6771
while (matcher.find()) {
68-
patternBuilder.append(quote(text, end, matcher.start()));
72+
patternBuilder.append(quote(text, end, matcher.start(), encodedRegexBuilder));
6973
String match = matcher.group();
7074
if ("?".equals(match)) {
7175
patternBuilder.append('.');
76+
encodedRegexBuilder.append('?');
7277
}
7378
else if ("*".equals(match)) {
7479
patternBuilder.append(".*");
80+
encodedRegexBuilder.append('*');
7581
int pos = matcher.start();
7682
if (pos < 1 || text.charAt(pos-1) != '.') {
7783
// To be compatible with the AntPathMatcher comparator,
@@ -80,6 +86,7 @@ else if ("*".equals(match)) {
8086
}
8187
}
8288
else if (match.startsWith("{") && match.endsWith("}")) {
89+
encodedRegexBuilder.append(match);
8390
int colonIdx = match.indexOf(':');
8491
if (colonIdx == -1) {
8592
patternBuilder.append(DEFAULT_VARIABLE_PATTERN);
@@ -106,7 +113,8 @@ else if (match.startsWith("{") && match.endsWith("}")) {
106113
end = matcher.end();
107114
}
108115

109-
patternBuilder.append(quote(text, end, text.length()));
116+
patternBuilder.append(quote(text, end, text.length(), encodedRegexBuilder));
117+
this.regex = encodedRegexBuilder.toString().toCharArray();
110118
if (this.caseSensitive) {
111119
return Pattern.compile(patternBuilder.toString());
112120
}
@@ -119,17 +127,33 @@ public List<String> getVariableNames() {
119127
return this.variableNames;
120128
}
121129

122-
private String quote(String s, int start, int end) {
130+
private String quote(String s, int start, int end, StringBuilder encodedRegexBuilder) {
123131
if (start == end) {
124132
return "";
125133
}
126-
return Pattern.quote(s.substring(start, end));
134+
String substring = s.substring(start, end);
135+
try {
136+
String encodedSubString = UriUtils.encodePath(substring, StandardCharsets.UTF_8.name());
137+
encodedRegexBuilder.append(encodedSubString);
138+
}
139+
catch (UnsupportedEncodingException e) {
140+
throw new IllegalStateException(e);
141+
}
142+
return Pattern.quote(substring);
127143
}
128144

129145
@Override
130146
public boolean matches(int candidateIndex, MatchingContext matchingContext) {
131147
int pos = matchingContext.scanAhead(candidateIndex);
132-
Matcher matcher = this.pattern.matcher(new SubSequence(matchingContext.candidate, candidateIndex, pos));
148+
149+
CharSequence textToMatch = null;
150+
if (includesPercent(matchingContext.candidate, candidateIndex, pos)) {
151+
textToMatch = decode(new SubSequence(matchingContext.candidate, candidateIndex, pos));
152+
}
153+
else {
154+
textToMatch = new SubSequence(matchingContext.candidate, candidateIndex, pos);
155+
}
156+
Matcher matcher = this.pattern.matcher(textToMatch);
133157
boolean matches = matcher.matches();
134158

135159
if (matches) {

spring-web/src/main/java/org/springframework/web/util/pattern/SingleCharWildcardedPathElement.java

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -65,16 +65,32 @@ public boolean matches(int candidateIndex, MatchingContext matchingContext) {
6565
if (this.caseSensitive) {
6666
for (int i = 0; i <this.len; i++) {
6767
char t = this.text[i];
68-
if (t != '?' && candidate[candidateIndex] != t) {
69-
return false;
68+
if (t == '?') {
69+
if (candidate[candidateIndex] == '%') {
70+
// encoded value, skip next two as well!
71+
candidateIndex += 2;
72+
}
73+
}
74+
else if (candidate[candidateIndex] != t) {
75+
// TODO unfortunate performance hit here on comparison when encoded data is the less likely case
76+
if (i < 3 || matchingContext.candidate[candidateIndex-2] != '%' ||
77+
Character.toUpperCase(matchingContext.candidate[candidateIndex]) != this.text[i]) {
78+
return false;
79+
}
7080
}
7181
candidateIndex++;
7282
}
7383
}
7484
else {
7585
for (int i = 0; i < this.len; i++) {
7686
char t = this.text[i];
77-
if (t != '?' && Character.toLowerCase(candidate[candidateIndex]) != t) {
87+
if (t == '?') {
88+
if (candidate[candidateIndex] == '%') {
89+
// encoded value, skip next two as well!
90+
candidateIndex += 2;
91+
}
92+
}
93+
else if (Character.toLowerCase(candidate[candidateIndex]) != t) {
7894
return false;
7995
}
8096
candidateIndex++;
@@ -117,7 +133,7 @@ public int getNormalizedLength() {
117133

118134

119135
public String toString() {
120-
return "SingleCharWildcarding(" + String.valueOf(this.text) + ")";
136+
return "SingleCharWildcarded(" + String.valueOf(this.text) + ")";
121137
}
122138

123139
}

0 commit comments

Comments
 (0)