|
16 | 16 | */ |
17 | 17 | package org.apache.solr.common.util; |
18 | 18 |
|
19 | | -import java.nio.file.FileSystems; |
20 | | -import java.nio.file.Paths; |
| 19 | +import java.util.ArrayDeque; |
| 20 | +import java.util.ArrayList; |
| 21 | +import java.util.Deque; |
21 | 22 |
|
22 | 23 | /** Provides methods for matching glob patterns against input strings. */ |
23 | 24 | public class GlobPatternUtil { |
24 | 25 |
|
25 | 26 | /** |
26 | | - * Matches an input string against a provided glob patterns. This uses Java NIO FileSystems |
27 | | - * PathMatcher to match glob patterns in the same way to how glob patterns are matches for file |
28 | | - * paths, rather than implementing our own glob pattern matching. |
| 27 | + * Matches an input string against a provided glob patterns. This uses the implementation from |
| 28 | + * Apache Commons IO FilenameUtils. We are just redoing the implementation here instead of |
| 29 | + * bringing in commons-io as a dependency. |
29 | 30 | * |
| 31 | + * @see <a |
| 32 | + * href="https://commons.apache.org/proper/commons-io/apidocs/org/apache/commons/io/FilenameUtils.html#wildcardMatch(java.lang.String,java.lang.String)">This |
| 33 | + * uses code from Apache Commons IO</a> |
30 | 34 | * @param pattern the glob pattern to match against |
31 | 35 | * @param input the input string to match against a glob pattern |
32 | 36 | * @return true if the input string matches the glob pattern, false otherwise |
33 | 37 | */ |
34 | 38 | public static boolean matches(String pattern, String input) { |
35 | | - return FileSystems.getDefault().getPathMatcher("glob:" + pattern).matches(Paths.get(input)); |
| 39 | + if (input == null && pattern == null) { |
| 40 | + return true; |
| 41 | + } |
| 42 | + if (input == null || pattern == null) { |
| 43 | + return false; |
| 44 | + } |
| 45 | + final String[] wcs = splitOnTokens(pattern); |
| 46 | + boolean anyChars = false; |
| 47 | + int textIdx = 0; |
| 48 | + int wcsIdx = 0; |
| 49 | + final Deque<int[]> backtrack = new ArrayDeque<>(wcs.length); |
| 50 | + |
| 51 | + // loop around a backtrack stack, to handle complex * matching |
| 52 | + do { |
| 53 | + if (!backtrack.isEmpty()) { |
| 54 | + final int[] array = backtrack.pop(); |
| 55 | + wcsIdx = array[0]; |
| 56 | + textIdx = array[1]; |
| 57 | + anyChars = true; |
| 58 | + } |
| 59 | + |
| 60 | + // loop whilst tokens and text left to process |
| 61 | + while (wcsIdx < wcs.length) { |
| 62 | + |
| 63 | + if (wcs[wcsIdx].equals("?")) { |
| 64 | + // ? so move to next text char |
| 65 | + textIdx++; |
| 66 | + if (textIdx > input.length()) { |
| 67 | + break; |
| 68 | + } |
| 69 | + anyChars = false; |
| 70 | + |
| 71 | + } else if (wcs[wcsIdx].equals("*")) { |
| 72 | + // set any chars status |
| 73 | + anyChars = true; |
| 74 | + if (wcsIdx == wcs.length - 1) { |
| 75 | + textIdx = input.length(); |
| 76 | + } |
| 77 | + |
| 78 | + } else { |
| 79 | + // matching text token |
| 80 | + if (anyChars) { |
| 81 | + // any chars then try to locate text token |
| 82 | + textIdx = checkIndexOf(input, textIdx, wcs[wcsIdx]); |
| 83 | + if (textIdx == -1) { |
| 84 | + // token not found |
| 85 | + break; |
| 86 | + } |
| 87 | + final int repeat = checkIndexOf(input, textIdx + 1, wcs[wcsIdx]); |
| 88 | + if (repeat >= 0) { |
| 89 | + backtrack.push(new int[] {wcsIdx, repeat}); |
| 90 | + } |
| 91 | + } else if (!input.regionMatches(false, textIdx, wcs[wcsIdx], 0, wcs[wcsIdx].length())) { |
| 92 | + // matching from current position |
| 93 | + // couldn't match token |
| 94 | + break; |
| 95 | + } |
| 96 | + |
| 97 | + // matched text token, move text index to end of matched token |
| 98 | + textIdx += wcs[wcsIdx].length(); |
| 99 | + anyChars = false; |
| 100 | + } |
| 101 | + |
| 102 | + wcsIdx++; |
| 103 | + } |
| 104 | + |
| 105 | + // full match |
| 106 | + if (wcsIdx == wcs.length && textIdx == input.length()) { |
| 107 | + return true; |
| 108 | + } |
| 109 | + |
| 110 | + } while (!backtrack.isEmpty()); |
| 111 | + |
| 112 | + return false; |
| 113 | + } |
| 114 | + |
| 115 | + /** |
| 116 | + * Splits a string into a number of tokens. The text is split by '?' and '*'. Where multiple '*' |
| 117 | + * occur consecutively they are collapsed into a single '*'. |
| 118 | + * |
| 119 | + * @see <a |
| 120 | + * href="https://commons.apache.org/proper/commons-io/apidocs/org/apache/commons/io/FilenameUtils.html">This |
| 121 | + * uses code from Apache Commons IO</a> |
| 122 | + * @param text the text to split |
| 123 | + * @return the array of tokens, never null |
| 124 | + */ |
| 125 | + private static String[] splitOnTokens(final String text) { |
| 126 | + // used by wildcardMatch |
| 127 | + // package level so a unit test may run on this |
| 128 | + |
| 129 | + if (text.indexOf('?') == -1 && text.indexOf('*') == -1) { |
| 130 | + return new String[] {text}; |
| 131 | + } |
| 132 | + |
| 133 | + final char[] array = text.toCharArray(); |
| 134 | + final ArrayList<String> list = new ArrayList<>(); |
| 135 | + final StringBuilder buffer = new StringBuilder(); |
| 136 | + char prevChar = 0; |
| 137 | + for (final char ch : array) { |
| 138 | + if (ch == '?' || ch == '*') { |
| 139 | + if (buffer.length() != 0) { |
| 140 | + list.add(buffer.toString()); |
| 141 | + buffer.setLength(0); |
| 142 | + } |
| 143 | + if (ch == '?') { |
| 144 | + list.add("?"); |
| 145 | + } else if (prevChar != '*') { // ch == '*' here; check if previous char was '*' |
| 146 | + list.add("*"); |
| 147 | + } |
| 148 | + } else { |
| 149 | + buffer.append(ch); |
| 150 | + } |
| 151 | + prevChar = ch; |
| 152 | + } |
| 153 | + if (buffer.length() != 0) { |
| 154 | + list.add(buffer.toString()); |
| 155 | + } |
| 156 | + |
| 157 | + return list.toArray(new String[] {}); |
| 158 | + } |
| 159 | + |
| 160 | + /** |
| 161 | + * Checks if one string contains another starting at a specific index using the case-sensitivity |
| 162 | + * rule. |
| 163 | + * |
| 164 | + * <p>This method mimics parts of {@link String#indexOf(String, int)} but takes case-sensitivity |
| 165 | + * into account. |
| 166 | + * |
| 167 | + * @see <a |
| 168 | + * href="https://commons.apache.org/proper/commons-io/apidocs/org/apache/commons/io/FilenameUtils.html">This |
| 169 | + * uses code from Apache Commons IO</a> |
| 170 | + * @param str the string to check, not null |
| 171 | + * @param strStartIndex the index to start at in str |
| 172 | + * @param search the start to search for, not null |
| 173 | + * @return the first index of the search String, -1 if no match or {@code null} string input |
| 174 | + * @throws NullPointerException if either string is null |
| 175 | + * @since 2.0 |
| 176 | + */ |
| 177 | + private static int checkIndexOf(final String str, final int strStartIndex, final String search) { |
| 178 | + final int endIndex = str.length() - search.length(); |
| 179 | + if (endIndex >= strStartIndex) { |
| 180 | + for (int i = strStartIndex; i <= endIndex; i++) { |
| 181 | + if (str.regionMatches(false, i, search, 0, search.length())) { |
| 182 | + return i; |
| 183 | + } |
| 184 | + } |
| 185 | + } |
| 186 | + return -1; |
36 | 187 | } |
37 | 188 | } |
0 commit comments