Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 69 additions & 19 deletions libs/core/src/main/java/org/elasticsearch/core/Glob.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,34 +29,84 @@ public static boolean globMatch(String pattern, String str) {
if (pattern == null || str == null) {
return false;
}
int firstIndex = pattern.indexOf('*');
if (firstIndex == -1) {

int patternIndex = pattern.indexOf('*');
if (patternIndex == -1) {
// Nothing to glob
return pattern.equals(str);
}
if (firstIndex == 0) {

if (patternIndex == 0) {
// If the pattern is a literal '*' then it matches any input
if (pattern.length() == 1) {
return true;
}
int nextIndex = pattern.indexOf('*', firstIndex + 1);
if (nextIndex == -1) {
return str.endsWith(pattern.substring(1));
} else if (nextIndex == 1) {
// Double wildcard "**" - skipping the first "*"
return globMatch(pattern.substring(1), str);
} else {
if (str.regionMatches(0, pattern, 0, patternIndex) == false) {
// If the pattern starts with a literal (i.e. not '*') then the input string must also start with that
return false;
}
String part = pattern.substring(1, nextIndex);
int partIndex = str.indexOf(part);
while (partIndex != -1) {
if (globMatch(pattern.substring(nextIndex), str.substring(partIndex + part.length()))) {
return true;
if (patternIndex == pattern.length() - 1) {
// The pattern is "something*", so if the starting region matches, then the whole pattern matches
return true;
}
}

int strIndex = patternIndex;
while (strIndex < str.length()) {
assert pattern.charAt(patternIndex) == '*' : "Expected * at index " + patternIndex + " of [" + pattern + "]";

// skip over the '*'
patternIndex++;

if (patternIndex == pattern.length()) {
// The pattern ends in '*' (that is, "something*" or "*some*thing*", etc)
// Since we already matched everything up to the '*' we know the string matches (whatever is left over must match '*')
// so we're automatically done
return true;
}

// Look for the next '*'
int nextStar = pattern.indexOf('*', patternIndex);
while (nextStar == patternIndex) {
// Two (or more) stars in sequence, just skip the subsequent ones
patternIndex++;
nextStar = pattern.indexOf('*', patternIndex);
}
if (nextStar == -1) {
// We've come to the last '*' in a pattern (.e.g the 2nd one in "*some*thing")
// In this case we match if the input string ends in "thing" (but constrained by the current position)
final int len = pattern.length() - patternIndex;
final int strSuffixStart = str.length() - len;
if (strSuffixStart < strIndex) {
// The suffix would start before the current position. That means it's not a match
// e.g. "abc" is not a match for "ab*bc" even though "abc" does end with "bc"
return false;
}
return str.regionMatches(strSuffixStart, pattern, patternIndex, len);
} else {
// There is another star, with a literal in between the current position and that '*'
// That is, we have "*literal*"
// We want the first '*' to consume everything up until the first occurrence of "literal" in the input string
int match = str.indexOf(pattern.substring(patternIndex, nextStar), strIndex);
Copy link
Member

@rjernst rjernst Aug 16, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we could move this to a helper method that performs charAt itself using a range of indices instead of needing to construct a substring?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I actually tried that, but it was slower (almost an order of magnitude so).

I suspect that's becauseString.indexOf marked as @IntrinsicCandidate and the intrinsic indexOf is fast enough to offset the cost of the substring copy.

if (match == -1) {
// If "literal" isn't there, then the match fails.
return false;
}
partIndex = str.indexOf(part, partIndex + 1);
// Move both index (pointer) values to the end of the literal
strIndex = match + (nextStar - patternIndex);
patternIndex = nextStar;
}
return false;
}
return (str.length() >= firstIndex
&& pattern.substring(0, firstIndex).equals(str.substring(0, firstIndex))
&& globMatch(pattern.substring(firstIndex), str.substring(firstIndex)));

// We might have trailing '*'s in the pattern after completing a literal match at the end of the input string
// e.g. a glob of "el*ic*" matching "elastic" - we need to consume that last '*' without it matching anything
while (patternIndex < pattern.length() && pattern.charAt(patternIndex) == '*') {
patternIndex++;
}

// The match is successful only if we have consumed the entire pattern.
return patternIndex == pattern.length();
}

}
199 changes: 199 additions & 0 deletions libs/core/src/test/java/org/elasticsearch/core/GlobTests.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.core;

import org.elasticsearch.test.ESTestCase;

import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.is;

public class GlobTests extends ESTestCase {

public void testMatchNull() {
assertThat(Glob.globMatch(null, null), is(false));
assertThat(Glob.globMatch(randomAlphaOfLengthBetween(1, 10), null), is(false));
assertThat(Glob.globMatch(null, randomAlphaOfLengthBetween(1, 10)), is(false));
}

public void testMatchLiteral() {
assertMatch("", "");
var str = randomAlphaOfLengthBetween(1, 12);
assertMatch(str, str);

str = randomAlphanumericOfLength(randomIntBetween(1, 12));
assertMatch(str, str);

str = randomAsciiString(randomIntBetween(1, 24), ch -> ch >= ' ' && ch <= '~' && ch != '*');
assertMatch(str, str);
}

public void testSingleAsterisk() {
assertMatch("*", "");
assertMatch("*", randomAlphaOfLengthBetween(1, 12));
assertMatch("*", randomAlphanumericOfLength(randomIntBetween(1, 12)));
assertMatch("*", randomAsciiString(randomIntBetween(1, 24), ch -> ch >= ' ' && ch <= '~'));
assertMatch("*", "*".repeat(randomIntBetween(1, 5)));
}

public void testMultipleConsecutiveAsterisk() {
var pattern = "*".repeat(randomIntBetween(2, 5));

assertMatch(pattern, "");
assertMatch(pattern, randomAlphaOfLengthBetween(1, 12));
assertMatch(pattern, randomAlphanumericOfLength(randomIntBetween(1, 12)));
assertMatch(pattern, randomAsciiString(randomIntBetween(1, 24)));
assertMatch(pattern, "*".repeat(randomIntBetween(1, 5)));
}

public void testPrefixMatch() {
assertMatch("123*", "123");
assertMatch("123*", "123abc");
assertMatch("123*", "123123123");
assertNonMatch("123*", "12");
assertNonMatch("123*", "124");
assertNonMatch("123*", "23");
assertNonMatch("123*", "23x");
assertNonMatch("123*", "x23");
assertNonMatch("123*", "12*");
assertNonMatch("123*", "12-3");
assertNonMatch("123*", "1.2.3");
assertNonMatch("123*", "abc123");
assertNonMatch("123*", "abc123def");

var prefix = randomAsciiString(randomIntBetween(2, 12));
var pattern = prefix + "*";
assertMatch(pattern, prefix);
assertMatch(pattern, prefix + randomAsciiString(randomIntBetween(1, 30)));
assertNonMatch(
pattern,
randomValueOtherThanMany(s -> s.charAt(0) == prefix.charAt(0), () -> randomAsciiString(randomIntBetween(1, 30))) + prefix
);
assertNonMatch(pattern, prefix.substring(0, prefix.length() - 1));
assertNonMatch(pattern, prefix.substring(1));
}

public void testSuffixMatch() {
assertMatch("*123", "123");
assertMatch("*123", "abc123");
assertMatch("*123", "123123123");
assertNonMatch("*123", "12");
assertNonMatch("*123", "x12");
assertNonMatch("*123", "23");
assertNonMatch("*123", "x23");
assertNonMatch("*123", "12*");
assertNonMatch("*123", "1.2.3");
assertNonMatch("*123", "123abc");
assertNonMatch("*123", "abc123def");

var suffix = randomAsciiString(randomIntBetween(2, 12));
var pattern = "*" + suffix;
assertMatch(pattern, suffix);
assertMatch(pattern, randomAsciiString(randomIntBetween(1, 30)) + suffix);
assertNonMatch(pattern, suffix + "#" + randomValueOtherThan(suffix, () -> randomAsciiString(randomIntBetween(1, 30))));
assertNonMatch(pattern, suffix.substring(0, suffix.length() - 1));
assertNonMatch(pattern, suffix.substring(1));
}

public void testInfixStringMatch() {
assertMatch("*123*", "abc123def");
assertMatch("*123*", "abc123");
assertMatch("*123*", "123def");
assertMatch("*123*", "123");
assertMatch("*123*", "123123123");
assertMatch("*123*", "1.12.123.1234");
assertNonMatch("*123*", "12");
assertNonMatch("*123*", "23");
assertNonMatch("*123*", "x23");
assertNonMatch("*123*", "12*");
assertNonMatch("*123*", "1.2.3");

var infix = randomAsciiString(randomIntBetween(2, 12));
var pattern = "*" + infix + "*";
assertMatch(pattern, infix);
assertMatch(pattern, randomAsciiString(randomIntBetween(1, 30)) + infix + randomAsciiString(randomIntBetween(1, 30)));
assertMatch(pattern, randomAsciiString(randomIntBetween(1, 30)) + infix);
assertMatch(pattern, infix + randomAsciiString(randomIntBetween(1, 30)));
assertNonMatch(pattern, infix.substring(0, infix.length() - 1));
assertNonMatch(pattern, infix.substring(1));
}

public void testInfixAsteriskMatch() {
assertMatch("abc*xyz", "abcxyz");
assertMatch("abc*xyz", "abc#xyz");
assertMatch("abc*xyz", "abc*xyz");
assertMatch("abc*xyz", "abcdefghijklmnopqrstuvwxyz");
assertNonMatch("abc*xyz", "ABC.xyz");
assertNonMatch("abc*xyz", "RabcSxyzT");
assertNonMatch("abc*xyz", "RabcSxyz");
assertNonMatch("abc*xyz", "abcSxyzT");

assertMatch("123*321", "123321");
assertMatch("123*321", "12345678987654321");
assertNonMatch("123*321", "12321");

var prefix = randomAsciiString(randomIntBetween(2, 12));
var suffix = randomAsciiString(randomIntBetween(2, 12));
var pattern = prefix + "*" + suffix;
assertMatch(pattern, prefix + suffix);
assertMatch(pattern, prefix + randomAsciiString(randomIntBetween(1, 30)) + suffix);
assertNonMatch(pattern, prefix.substring(0, prefix.length() - 1) + suffix);
assertNonMatch(pattern, prefix + suffix.substring(1));
}

public void testLiteralSubstringMatching() {
assertMatch("start*middle*end", "startmiddleend");
assertMatch("start*middle*end", "start.middle.end");
assertMatch("start*middle*end", "start.middlX.middle.end");
assertMatch("start*middle*end", "start.middlmiddle.end");
assertMatch("start*middle*end", "start.middle.eend");
assertMatch("start*middle*end", "start.middle.enend");
assertMatch("start*middle*end", "start.middle.endend");

assertNonMatch("start*middle*end", "startmiddlend");
assertNonMatch("start*middle*end", "start.end");
assertNonMatch("start*middle*end", "start+MIDDLE+end");
assertNonMatch("start*middle*end", "start+mid+dle+end");
assertNonMatch("start*middle*end", "start+mid+middle+en");
}

private static void assertMatch(String pattern, String str) {
assertThat("Expect [" + str + "] to match '" + pattern + "'", Glob.globMatch(pattern, str), is(true));
}

private static void assertNonMatch(String pattern, String str) {
assertThat("Expect [" + str + "] to not match '" + pattern + "'", Glob.globMatch(pattern, str), is(false));
}

@FunctionalInterface
interface CharPredicate {
boolean test(char c);
}

private String randomAsciiString(int length) {
return randomAsciiString(length, ch -> ch >= ' ' && ch <= '~');
}

private String randomAsciiString(int length, CharPredicate validCharacters) {
StringBuilder str = new StringBuilder(length);
nextChar: for (int i = 0; i < length; i++) {
for (int attempts = 0; attempts < 200; attempts++) {
char ch = (char) randomIntBetween(0x1, 0x7f);
if (validCharacters.test(ch)) {
str.append(ch);
continue nextChar;
}
}
throw new IllegalStateException("Cannot find valid character for string");
}
assertThat(str.length(), equalTo(length));
return str.toString();
}

}