Skip to content

Commit d2387cf

Browse files
authored
Make Glob non-recursive (#132798)
This changes the implementation of `Glob` (used by `FilterPath`) to use a non-recursive algorithm for improved efficiency and stability
1 parent e8957e4 commit d2387cf

File tree

2 files changed

+268
-19
lines changed

2 files changed

+268
-19
lines changed

libs/core/src/main/java/org/elasticsearch/core/Glob.java

Lines changed: 69 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -29,34 +29,84 @@ public static boolean globMatch(String pattern, String str) {
2929
if (pattern == null || str == null) {
3030
return false;
3131
}
32-
int firstIndex = pattern.indexOf('*');
33-
if (firstIndex == -1) {
32+
33+
int patternIndex = pattern.indexOf('*');
34+
if (patternIndex == -1) {
35+
// Nothing to glob
3436
return pattern.equals(str);
3537
}
36-
if (firstIndex == 0) {
38+
39+
if (patternIndex == 0) {
40+
// If the pattern is a literal '*' then it matches any input
3741
if (pattern.length() == 1) {
3842
return true;
3943
}
40-
int nextIndex = pattern.indexOf('*', firstIndex + 1);
41-
if (nextIndex == -1) {
42-
return str.endsWith(pattern.substring(1));
43-
} else if (nextIndex == 1) {
44-
// Double wildcard "**" - skipping the first "*"
45-
return globMatch(pattern.substring(1), str);
44+
} else {
45+
if (str.regionMatches(0, pattern, 0, patternIndex) == false) {
46+
// If the pattern starts with a literal (i.e. not '*') then the input string must also start with that
47+
return false;
4648
}
47-
String part = pattern.substring(1, nextIndex);
48-
int partIndex = str.indexOf(part);
49-
while (partIndex != -1) {
50-
if (globMatch(pattern.substring(nextIndex), str.substring(partIndex + part.length()))) {
51-
return true;
49+
if (patternIndex == pattern.length() - 1) {
50+
// The pattern is "something*", so if the starting region matches, then the whole pattern matches
51+
return true;
52+
}
53+
}
54+
55+
int strIndex = patternIndex;
56+
while (strIndex < str.length()) {
57+
assert pattern.charAt(patternIndex) == '*' : "Expected * at index " + patternIndex + " of [" + pattern + "]";
58+
59+
// skip over the '*'
60+
patternIndex++;
61+
62+
if (patternIndex == pattern.length()) {
63+
// The pattern ends in '*' (that is, "something*" or "*some*thing*", etc)
64+
// Since we already matched everything up to the '*' we know the string matches (whatever is left over must match '*')
65+
// so we're automatically done
66+
return true;
67+
}
68+
69+
// Look for the next '*'
70+
int nextStar = pattern.indexOf('*', patternIndex);
71+
while (nextStar == patternIndex) {
72+
// Two (or more) stars in sequence, just skip the subsequent ones
73+
patternIndex++;
74+
nextStar = pattern.indexOf('*', patternIndex);
75+
}
76+
if (nextStar == -1) {
77+
// We've come to the last '*' in a pattern (.e.g the 2nd one in "*some*thing")
78+
// In this case we match if the input string ends in "thing" (but constrained by the current position)
79+
final int len = pattern.length() - patternIndex;
80+
final int strSuffixStart = str.length() - len;
81+
if (strSuffixStart < strIndex) {
82+
// The suffix would start before the current position. That means it's not a match
83+
// e.g. "abc" is not a match for "ab*bc" even though "abc" does end with "bc"
84+
return false;
85+
}
86+
return str.regionMatches(strSuffixStart, pattern, patternIndex, len);
87+
} else {
88+
// There is another star, with a literal in between the current position and that '*'
89+
// That is, we have "*literal*"
90+
// We want the first '*' to consume everything up until the first occurrence of "literal" in the input string
91+
int match = str.indexOf(pattern.substring(patternIndex, nextStar), strIndex);
92+
if (match == -1) {
93+
// If "literal" isn't there, then the match fails.
94+
return false;
5295
}
53-
partIndex = str.indexOf(part, partIndex + 1);
96+
// Move both index (pointer) values to the end of the literal
97+
strIndex = match + (nextStar - patternIndex);
98+
patternIndex = nextStar;
5499
}
55-
return false;
56100
}
57-
return (str.length() >= firstIndex
58-
&& pattern.substring(0, firstIndex).equals(str.substring(0, firstIndex))
59-
&& globMatch(pattern.substring(firstIndex), str.substring(firstIndex)));
101+
102+
// We might have trailing '*'s in the pattern after completing a literal match at the end of the input string
103+
// e.g. a glob of "el*ic*" matching "elastic" - we need to consume that last '*' without it matching anything
104+
while (patternIndex < pattern.length() && pattern.charAt(patternIndex) == '*') {
105+
patternIndex++;
106+
}
107+
108+
// The match is successful only if we have consumed the entire pattern.
109+
return patternIndex == pattern.length();
60110
}
61111

62112
}
Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,199 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.core;
11+
12+
import org.elasticsearch.test.ESTestCase;
13+
14+
import static org.hamcrest.Matchers.equalTo;
15+
import static org.hamcrest.Matchers.is;
16+
17+
public class GlobTests extends ESTestCase {
18+
19+
public void testMatchNull() {
20+
assertThat(Glob.globMatch(null, null), is(false));
21+
assertThat(Glob.globMatch(randomAlphaOfLengthBetween(1, 10), null), is(false));
22+
assertThat(Glob.globMatch(null, randomAlphaOfLengthBetween(1, 10)), is(false));
23+
}
24+
25+
public void testMatchLiteral() {
26+
assertMatch("", "");
27+
var str = randomAlphaOfLengthBetween(1, 12);
28+
assertMatch(str, str);
29+
30+
str = randomAlphanumericOfLength(randomIntBetween(1, 12));
31+
assertMatch(str, str);
32+
33+
str = randomAsciiString(randomIntBetween(1, 24), ch -> ch >= ' ' && ch <= '~' && ch != '*');
34+
assertMatch(str, str);
35+
}
36+
37+
public void testSingleAsterisk() {
38+
assertMatch("*", "");
39+
assertMatch("*", randomAlphaOfLengthBetween(1, 12));
40+
assertMatch("*", randomAlphanumericOfLength(randomIntBetween(1, 12)));
41+
assertMatch("*", randomAsciiString(randomIntBetween(1, 24), ch -> ch >= ' ' && ch <= '~'));
42+
assertMatch("*", "*".repeat(randomIntBetween(1, 5)));
43+
}
44+
45+
public void testMultipleConsecutiveAsterisk() {
46+
var pattern = "*".repeat(randomIntBetween(2, 5));
47+
48+
assertMatch(pattern, "");
49+
assertMatch(pattern, randomAlphaOfLengthBetween(1, 12));
50+
assertMatch(pattern, randomAlphanumericOfLength(randomIntBetween(1, 12)));
51+
assertMatch(pattern, randomAsciiString(randomIntBetween(1, 24)));
52+
assertMatch(pattern, "*".repeat(randomIntBetween(1, 5)));
53+
}
54+
55+
public void testPrefixMatch() {
56+
assertMatch("123*", "123");
57+
assertMatch("123*", "123abc");
58+
assertMatch("123*", "123123123");
59+
assertNonMatch("123*", "12");
60+
assertNonMatch("123*", "124");
61+
assertNonMatch("123*", "23");
62+
assertNonMatch("123*", "23x");
63+
assertNonMatch("123*", "x23");
64+
assertNonMatch("123*", "12*");
65+
assertNonMatch("123*", "12-3");
66+
assertNonMatch("123*", "1.2.3");
67+
assertNonMatch("123*", "abc123");
68+
assertNonMatch("123*", "abc123def");
69+
70+
var prefix = randomAsciiString(randomIntBetween(2, 12));
71+
var pattern = prefix + "*";
72+
assertMatch(pattern, prefix);
73+
assertMatch(pattern, prefix + randomAsciiString(randomIntBetween(1, 30)));
74+
assertNonMatch(
75+
pattern,
76+
randomValueOtherThanMany(s -> s.charAt(0) == prefix.charAt(0), () -> randomAsciiString(randomIntBetween(1, 30))) + prefix
77+
);
78+
assertNonMatch(pattern, prefix.substring(0, prefix.length() - 1));
79+
assertNonMatch(pattern, prefix.substring(1));
80+
}
81+
82+
public void testSuffixMatch() {
83+
assertMatch("*123", "123");
84+
assertMatch("*123", "abc123");
85+
assertMatch("*123", "123123123");
86+
assertNonMatch("*123", "12");
87+
assertNonMatch("*123", "x12");
88+
assertNonMatch("*123", "23");
89+
assertNonMatch("*123", "x23");
90+
assertNonMatch("*123", "12*");
91+
assertNonMatch("*123", "1.2.3");
92+
assertNonMatch("*123", "123abc");
93+
assertNonMatch("*123", "abc123def");
94+
95+
var suffix = randomAsciiString(randomIntBetween(2, 12));
96+
var pattern = "*" + suffix;
97+
assertMatch(pattern, suffix);
98+
assertMatch(pattern, randomAsciiString(randomIntBetween(1, 30)) + suffix);
99+
assertNonMatch(pattern, suffix + "#" + randomValueOtherThan(suffix, () -> randomAsciiString(randomIntBetween(1, 30))));
100+
assertNonMatch(pattern, suffix.substring(0, suffix.length() - 1));
101+
assertNonMatch(pattern, suffix.substring(1));
102+
}
103+
104+
public void testInfixStringMatch() {
105+
assertMatch("*123*", "abc123def");
106+
assertMatch("*123*", "abc123");
107+
assertMatch("*123*", "123def");
108+
assertMatch("*123*", "123");
109+
assertMatch("*123*", "123123123");
110+
assertMatch("*123*", "1.12.123.1234");
111+
assertNonMatch("*123*", "12");
112+
assertNonMatch("*123*", "23");
113+
assertNonMatch("*123*", "x23");
114+
assertNonMatch("*123*", "12*");
115+
assertNonMatch("*123*", "1.2.3");
116+
117+
var infix = randomAsciiString(randomIntBetween(2, 12));
118+
var pattern = "*" + infix + "*";
119+
assertMatch(pattern, infix);
120+
assertMatch(pattern, randomAsciiString(randomIntBetween(1, 30)) + infix + randomAsciiString(randomIntBetween(1, 30)));
121+
assertMatch(pattern, randomAsciiString(randomIntBetween(1, 30)) + infix);
122+
assertMatch(pattern, infix + randomAsciiString(randomIntBetween(1, 30)));
123+
assertNonMatch(pattern, infix.substring(0, infix.length() - 1));
124+
assertNonMatch(pattern, infix.substring(1));
125+
}
126+
127+
public void testInfixAsteriskMatch() {
128+
assertMatch("abc*xyz", "abcxyz");
129+
assertMatch("abc*xyz", "abc#xyz");
130+
assertMatch("abc*xyz", "abc*xyz");
131+
assertMatch("abc*xyz", "abcdefghijklmnopqrstuvwxyz");
132+
assertNonMatch("abc*xyz", "ABC.xyz");
133+
assertNonMatch("abc*xyz", "RabcSxyzT");
134+
assertNonMatch("abc*xyz", "RabcSxyz");
135+
assertNonMatch("abc*xyz", "abcSxyzT");
136+
137+
assertMatch("123*321", "123321");
138+
assertMatch("123*321", "12345678987654321");
139+
assertNonMatch("123*321", "12321");
140+
141+
var prefix = randomAsciiString(randomIntBetween(2, 12));
142+
var suffix = randomAsciiString(randomIntBetween(2, 12));
143+
var pattern = prefix + "*" + suffix;
144+
assertMatch(pattern, prefix + suffix);
145+
assertMatch(pattern, prefix + randomAsciiString(randomIntBetween(1, 30)) + suffix);
146+
assertNonMatch(pattern, prefix.substring(0, prefix.length() - 1) + suffix);
147+
assertNonMatch(pattern, prefix + suffix.substring(1));
148+
}
149+
150+
public void testLiteralSubstringMatching() {
151+
assertMatch("start*middle*end", "startmiddleend");
152+
assertMatch("start*middle*end", "start.middle.end");
153+
assertMatch("start*middle*end", "start.middlX.middle.end");
154+
assertMatch("start*middle*end", "start.middlmiddle.end");
155+
assertMatch("start*middle*end", "start.middle.eend");
156+
assertMatch("start*middle*end", "start.middle.enend");
157+
assertMatch("start*middle*end", "start.middle.endend");
158+
159+
assertNonMatch("start*middle*end", "startmiddlend");
160+
assertNonMatch("start*middle*end", "start.end");
161+
assertNonMatch("start*middle*end", "start+MIDDLE+end");
162+
assertNonMatch("start*middle*end", "start+mid+dle+end");
163+
assertNonMatch("start*middle*end", "start+mid+middle+en");
164+
}
165+
166+
private static void assertMatch(String pattern, String str) {
167+
assertThat("Expect [" + str + "] to match '" + pattern + "'", Glob.globMatch(pattern, str), is(true));
168+
}
169+
170+
private static void assertNonMatch(String pattern, String str) {
171+
assertThat("Expect [" + str + "] to not match '" + pattern + "'", Glob.globMatch(pattern, str), is(false));
172+
}
173+
174+
@FunctionalInterface
175+
interface CharPredicate {
176+
boolean test(char c);
177+
}
178+
179+
private String randomAsciiString(int length) {
180+
return randomAsciiString(length, ch -> ch >= ' ' && ch <= '~');
181+
}
182+
183+
private String randomAsciiString(int length, CharPredicate validCharacters) {
184+
StringBuilder str = new StringBuilder(length);
185+
nextChar: for (int i = 0; i < length; i++) {
186+
for (int attempts = 0; attempts < 200; attempts++) {
187+
char ch = (char) randomIntBetween(0x1, 0x7f);
188+
if (validCharacters.test(ch)) {
189+
str.append(ch);
190+
continue nextChar;
191+
}
192+
}
193+
throw new IllegalStateException("Cannot find valid character for string");
194+
}
195+
assertThat(str.length(), equalTo(length));
196+
return str.toString();
197+
}
198+
199+
}

0 commit comments

Comments
 (0)