Skip to content

Commit d3a5908

Browse files
author
Justin Sweeney
authored
SOLR-17181: Using apache commons implementation for wildcard matching for glob patterns (#2301)
* Using apache commons implementation for wildcard matching for glob patterns * Removing commons io dependency and instead reusing applicable code from that library * Fixing comments with Javadoc links * Actually fixing comments with Javadoc links * Fixing javadoc comments
1 parent f63b937 commit d3a5908

File tree

1 file changed

+157
-6
lines changed

1 file changed

+157
-6
lines changed

solr/solrj/src/java/org/apache/solr/common/util/GlobPatternUtil.java

Lines changed: 157 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,22 +16,173 @@
1616
*/
1717
package org.apache.solr.common.util;
1818

19-
import java.nio.file.FileSystems;
20-
import java.nio.file.Paths;
19+
import java.util.ArrayDeque;
20+
import java.util.ArrayList;
21+
import java.util.Deque;
2122

2223
/** Provides methods for matching glob patterns against input strings. */
2324
public class GlobPatternUtil {
2425

2526
/**
26-
* Matches an input string against a provided glob patterns. This uses Java NIO FileSystems
27-
* PathMatcher to match glob patterns in the same way to how glob patterns are matches for file
28-
* paths, rather than implementing our own glob pattern matching.
27+
* Matches an input string against a provided glob patterns. This uses the implementation from
28+
* Apache Commons IO FilenameUtils. We are just redoing the implementation here instead of
29+
* bringing in commons-io as a dependency.
2930
*
31+
* @see <a
32+
* href="https://commons.apache.org/proper/commons-io/apidocs/org/apache/commons/io/FilenameUtils.html#wildcardMatch(java.lang.String,java.lang.String)">This
33+
* uses code from Apache Commons IO</a>
3034
* @param pattern the glob pattern to match against
3135
* @param input the input string to match against a glob pattern
3236
* @return true if the input string matches the glob pattern, false otherwise
3337
*/
3438
public static boolean matches(String pattern, String input) {
35-
return FileSystems.getDefault().getPathMatcher("glob:" + pattern).matches(Paths.get(input));
39+
if (input == null && pattern == null) {
40+
return true;
41+
}
42+
if (input == null || pattern == null) {
43+
return false;
44+
}
45+
final String[] wcs = splitOnTokens(pattern);
46+
boolean anyChars = false;
47+
int textIdx = 0;
48+
int wcsIdx = 0;
49+
final Deque<int[]> backtrack = new ArrayDeque<>(wcs.length);
50+
51+
// loop around a backtrack stack, to handle complex * matching
52+
do {
53+
if (!backtrack.isEmpty()) {
54+
final int[] array = backtrack.pop();
55+
wcsIdx = array[0];
56+
textIdx = array[1];
57+
anyChars = true;
58+
}
59+
60+
// loop whilst tokens and text left to process
61+
while (wcsIdx < wcs.length) {
62+
63+
if (wcs[wcsIdx].equals("?")) {
64+
// ? so move to next text char
65+
textIdx++;
66+
if (textIdx > input.length()) {
67+
break;
68+
}
69+
anyChars = false;
70+
71+
} else if (wcs[wcsIdx].equals("*")) {
72+
// set any chars status
73+
anyChars = true;
74+
if (wcsIdx == wcs.length - 1) {
75+
textIdx = input.length();
76+
}
77+
78+
} else {
79+
// matching text token
80+
if (anyChars) {
81+
// any chars then try to locate text token
82+
textIdx = checkIndexOf(input, textIdx, wcs[wcsIdx]);
83+
if (textIdx == -1) {
84+
// token not found
85+
break;
86+
}
87+
final int repeat = checkIndexOf(input, textIdx + 1, wcs[wcsIdx]);
88+
if (repeat >= 0) {
89+
backtrack.push(new int[] {wcsIdx, repeat});
90+
}
91+
} else if (!input.regionMatches(false, textIdx, wcs[wcsIdx], 0, wcs[wcsIdx].length())) {
92+
// matching from current position
93+
// couldn't match token
94+
break;
95+
}
96+
97+
// matched text token, move text index to end of matched token
98+
textIdx += wcs[wcsIdx].length();
99+
anyChars = false;
100+
}
101+
102+
wcsIdx++;
103+
}
104+
105+
// full match
106+
if (wcsIdx == wcs.length && textIdx == input.length()) {
107+
return true;
108+
}
109+
110+
} while (!backtrack.isEmpty());
111+
112+
return false;
113+
}
114+
115+
/**
116+
* Splits a string into a number of tokens. The text is split by '?' and '*'. Where multiple '*'
117+
* occur consecutively they are collapsed into a single '*'.
118+
*
119+
* @see <a
120+
* href="https://commons.apache.org/proper/commons-io/apidocs/org/apache/commons/io/FilenameUtils.html">This
121+
* uses code from Apache Commons IO</a>
122+
* @param text the text to split
123+
* @return the array of tokens, never null
124+
*/
125+
private static String[] splitOnTokens(final String text) {
126+
// used by wildcardMatch
127+
// package level so a unit test may run on this
128+
129+
if (text.indexOf('?') == -1 && text.indexOf('*') == -1) {
130+
return new String[] {text};
131+
}
132+
133+
final char[] array = text.toCharArray();
134+
final ArrayList<String> list = new ArrayList<>();
135+
final StringBuilder buffer = new StringBuilder();
136+
char prevChar = 0;
137+
for (final char ch : array) {
138+
if (ch == '?' || ch == '*') {
139+
if (buffer.length() != 0) {
140+
list.add(buffer.toString());
141+
buffer.setLength(0);
142+
}
143+
if (ch == '?') {
144+
list.add("?");
145+
} else if (prevChar != '*') { // ch == '*' here; check if previous char was '*'
146+
list.add("*");
147+
}
148+
} else {
149+
buffer.append(ch);
150+
}
151+
prevChar = ch;
152+
}
153+
if (buffer.length() != 0) {
154+
list.add(buffer.toString());
155+
}
156+
157+
return list.toArray(new String[] {});
158+
}
159+
160+
/**
161+
* Checks if one string contains another starting at a specific index using the case-sensitivity
162+
* rule.
163+
*
164+
* <p>This method mimics parts of {@link String#indexOf(String, int)} but takes case-sensitivity
165+
* into account.
166+
*
167+
* @see <a
168+
* href="https://commons.apache.org/proper/commons-io/apidocs/org/apache/commons/io/FilenameUtils.html">This
169+
* uses code from Apache Commons IO</a>
170+
* @param str the string to check, not null
171+
* @param strStartIndex the index to start at in str
172+
* @param search the start to search for, not null
173+
* @return the first index of the search String, -1 if no match or {@code null} string input
174+
* @throws NullPointerException if either string is null
175+
* @since 2.0
176+
*/
177+
private static int checkIndexOf(final String str, final int strStartIndex, final String search) {
178+
final int endIndex = str.length() - search.length();
179+
if (endIndex >= strStartIndex) {
180+
for (int i = strStartIndex; i <= endIndex; i++) {
181+
if (str.regionMatches(false, i, search, 0, search.length())) {
182+
return i;
183+
}
184+
}
185+
}
186+
return -1;
36187
}
37188
}

0 commit comments

Comments
 (0)