diff --git a/libs/core/src/main/java/org/elasticsearch/core/Glob.java b/libs/core/src/main/java/org/elasticsearch/core/Glob.java index 8bc1f23c44bb5..b3e1106bd966d 100644 --- a/libs/core/src/main/java/org/elasticsearch/core/Glob.java +++ b/libs/core/src/main/java/org/elasticsearch/core/Glob.java @@ -29,34 +29,84 @@ public static boolean globMatch(String pattern, String str) { if (pattern == null || str == null) { return false; } - int firstIndex = pattern.indexOf('*'); - if (firstIndex == -1) { + + int patternIndex = pattern.indexOf('*'); + if (patternIndex == -1) { + // Nothing to glob return pattern.equals(str); } - if (firstIndex == 0) { + + if (patternIndex == 0) { + // If the pattern is a literal '*' then it matches any input if (pattern.length() == 1) { return true; } - int nextIndex = pattern.indexOf('*', firstIndex + 1); - if (nextIndex == -1) { - return str.endsWith(pattern.substring(1)); - } else if (nextIndex == 1) { - // Double wildcard "**" - skipping the first "*" - return globMatch(pattern.substring(1), str); + } else { + if (str.regionMatches(0, pattern, 0, patternIndex) == false) { + // If the pattern starts with a literal (i.e. not '*') then the input string must also start with that + return false; } - String part = pattern.substring(1, nextIndex); - int partIndex = str.indexOf(part); - while (partIndex != -1) { - if (globMatch(pattern.substring(nextIndex), str.substring(partIndex + part.length()))) { - return true; + if (patternIndex == pattern.length() - 1) { + // The pattern is "something*", so if the starting region matches, then the whole pattern matches + return true; + } + } + + int strIndex = patternIndex; + while (strIndex < str.length()) { + assert pattern.charAt(patternIndex) == '*' : "Expected * at index " + patternIndex + " of [" + pattern + "]"; + + // skip over the '*' + patternIndex++; + + if (patternIndex == pattern.length()) { + // The pattern ends in '*' (that is, "something*" or "*some*thing*", etc) + // Since we already matched everything up to the '*' we know the string matches (whatever is left over must match '*') + // so we're automatically done + return true; + } + + // Look for the next '*' + int nextStar = pattern.indexOf('*', patternIndex); + while (nextStar == patternIndex) { + // Two (or more) stars in sequence, just skip the subsequent ones + patternIndex++; + nextStar = pattern.indexOf('*', patternIndex); + } + if (nextStar == -1) { + // We've come to the last '*' in a pattern (.e.g the 2nd one in "*some*thing") + // In this case we match if the input string ends in "thing" (but constrained by the current position) + final int len = pattern.length() - patternIndex; + final int strSuffixStart = str.length() - len; + if (strSuffixStart < strIndex) { + // The suffix would start before the current position. That means it's not a match + // e.g. "abc" is not a match for "ab*bc" even though "abc" does end with "bc" + return false; + } + return str.regionMatches(strSuffixStart, pattern, patternIndex, len); + } else { + // There is another star, with a literal in between the current position and that '*' + // That is, we have "*literal*" + // We want the first '*' to consume everything up until the first occurrence of "literal" in the input string + int match = str.indexOf(pattern.substring(patternIndex, nextStar), strIndex); + if (match == -1) { + // If "literal" isn't there, then the match fails. + return false; } - partIndex = str.indexOf(part, partIndex + 1); + // Move both index (pointer) values to the end of the literal + strIndex = match + (nextStar - patternIndex); + patternIndex = nextStar; } - return false; } - return (str.length() >= firstIndex - && pattern.substring(0, firstIndex).equals(str.substring(0, firstIndex)) - && globMatch(pattern.substring(firstIndex), str.substring(firstIndex))); + + // We might have trailing '*'s in the pattern after completing a literal match at the end of the input string + // e.g. a glob of "el*ic*" matching "elastic" - we need to consume that last '*' without it matching anything + while (patternIndex < pattern.length() && pattern.charAt(patternIndex) == '*') { + patternIndex++; + } + + // The match is successful only if we have consumed the entire pattern. + return patternIndex == pattern.length(); } } diff --git a/libs/core/src/test/java/org/elasticsearch/core/GlobTests.java b/libs/core/src/test/java/org/elasticsearch/core/GlobTests.java new file mode 100644 index 0000000000000..f37dfb47c6591 --- /dev/null +++ b/libs/core/src/test/java/org/elasticsearch/core/GlobTests.java @@ -0,0 +1,199 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.core; + +import org.elasticsearch.test.ESTestCase; + +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.is; + +public class GlobTests extends ESTestCase { + + public void testMatchNull() { + assertThat(Glob.globMatch(null, null), is(false)); + assertThat(Glob.globMatch(randomAlphaOfLengthBetween(1, 10), null), is(false)); + assertThat(Glob.globMatch(null, randomAlphaOfLengthBetween(1, 10)), is(false)); + } + + public void testMatchLiteral() { + assertMatch("", ""); + var str = randomAlphaOfLengthBetween(1, 12); + assertMatch(str, str); + + str = randomAlphanumericOfLength(randomIntBetween(1, 12)); + assertMatch(str, str); + + str = randomAsciiString(randomIntBetween(1, 24), ch -> ch >= ' ' && ch <= '~' && ch != '*'); + assertMatch(str, str); + } + + public void testSingleAsterisk() { + assertMatch("*", ""); + assertMatch("*", randomAlphaOfLengthBetween(1, 12)); + assertMatch("*", randomAlphanumericOfLength(randomIntBetween(1, 12))); + assertMatch("*", randomAsciiString(randomIntBetween(1, 24), ch -> ch >= ' ' && ch <= '~')); + assertMatch("*", "*".repeat(randomIntBetween(1, 5))); + } + + public void testMultipleConsecutiveAsterisk() { + var pattern = "*".repeat(randomIntBetween(2, 5)); + + assertMatch(pattern, ""); + assertMatch(pattern, randomAlphaOfLengthBetween(1, 12)); + assertMatch(pattern, randomAlphanumericOfLength(randomIntBetween(1, 12))); + assertMatch(pattern, randomAsciiString(randomIntBetween(1, 24))); + assertMatch(pattern, "*".repeat(randomIntBetween(1, 5))); + } + + public void testPrefixMatch() { + assertMatch("123*", "123"); + assertMatch("123*", "123abc"); + assertMatch("123*", "123123123"); + assertNonMatch("123*", "12"); + assertNonMatch("123*", "124"); + assertNonMatch("123*", "23"); + assertNonMatch("123*", "23x"); + assertNonMatch("123*", "x23"); + assertNonMatch("123*", "12*"); + assertNonMatch("123*", "12-3"); + assertNonMatch("123*", "1.2.3"); + assertNonMatch("123*", "abc123"); + assertNonMatch("123*", "abc123def"); + + var prefix = randomAsciiString(randomIntBetween(2, 12)); + var pattern = prefix + "*"; + assertMatch(pattern, prefix); + assertMatch(pattern, prefix + randomAsciiString(randomIntBetween(1, 30))); + assertNonMatch( + pattern, + randomValueOtherThanMany(s -> s.charAt(0) == prefix.charAt(0), () -> randomAsciiString(randomIntBetween(1, 30))) + prefix + ); + assertNonMatch(pattern, prefix.substring(0, prefix.length() - 1)); + assertNonMatch(pattern, prefix.substring(1)); + } + + public void testSuffixMatch() { + assertMatch("*123", "123"); + assertMatch("*123", "abc123"); + assertMatch("*123", "123123123"); + assertNonMatch("*123", "12"); + assertNonMatch("*123", "x12"); + assertNonMatch("*123", "23"); + assertNonMatch("*123", "x23"); + assertNonMatch("*123", "12*"); + assertNonMatch("*123", "1.2.3"); + assertNonMatch("*123", "123abc"); + assertNonMatch("*123", "abc123def"); + + var suffix = randomAsciiString(randomIntBetween(2, 12)); + var pattern = "*" + suffix; + assertMatch(pattern, suffix); + assertMatch(pattern, randomAsciiString(randomIntBetween(1, 30)) + suffix); + assertNonMatch(pattern, suffix + "#" + randomValueOtherThan(suffix, () -> randomAsciiString(randomIntBetween(1, 30)))); + assertNonMatch(pattern, suffix.substring(0, suffix.length() - 1)); + assertNonMatch(pattern, suffix.substring(1)); + } + + public void testInfixStringMatch() { + assertMatch("*123*", "abc123def"); + assertMatch("*123*", "abc123"); + assertMatch("*123*", "123def"); + assertMatch("*123*", "123"); + assertMatch("*123*", "123123123"); + assertMatch("*123*", "1.12.123.1234"); + assertNonMatch("*123*", "12"); + assertNonMatch("*123*", "23"); + assertNonMatch("*123*", "x23"); + assertNonMatch("*123*", "12*"); + assertNonMatch("*123*", "1.2.3"); + + var infix = randomAsciiString(randomIntBetween(2, 12)); + var pattern = "*" + infix + "*"; + assertMatch(pattern, infix); + assertMatch(pattern, randomAsciiString(randomIntBetween(1, 30)) + infix + randomAsciiString(randomIntBetween(1, 30))); + assertMatch(pattern, randomAsciiString(randomIntBetween(1, 30)) + infix); + assertMatch(pattern, infix + randomAsciiString(randomIntBetween(1, 30))); + assertNonMatch(pattern, infix.substring(0, infix.length() - 1)); + assertNonMatch(pattern, infix.substring(1)); + } + + public void testInfixAsteriskMatch() { + assertMatch("abc*xyz", "abcxyz"); + assertMatch("abc*xyz", "abc#xyz"); + assertMatch("abc*xyz", "abc*xyz"); + assertMatch("abc*xyz", "abcdefghijklmnopqrstuvwxyz"); + assertNonMatch("abc*xyz", "ABC.xyz"); + assertNonMatch("abc*xyz", "RabcSxyzT"); + assertNonMatch("abc*xyz", "RabcSxyz"); + assertNonMatch("abc*xyz", "abcSxyzT"); + + assertMatch("123*321", "123321"); + assertMatch("123*321", "12345678987654321"); + assertNonMatch("123*321", "12321"); + + var prefix = randomAsciiString(randomIntBetween(2, 12)); + var suffix = randomAsciiString(randomIntBetween(2, 12)); + var pattern = prefix + "*" + suffix; + assertMatch(pattern, prefix + suffix); + assertMatch(pattern, prefix + randomAsciiString(randomIntBetween(1, 30)) + suffix); + assertNonMatch(pattern, prefix.substring(0, prefix.length() - 1) + suffix); + assertNonMatch(pattern, prefix + suffix.substring(1)); + } + + public void testLiteralSubstringMatching() { + assertMatch("start*middle*end", "startmiddleend"); + assertMatch("start*middle*end", "start.middle.end"); + assertMatch("start*middle*end", "start.middlX.middle.end"); + assertMatch("start*middle*end", "start.middlmiddle.end"); + assertMatch("start*middle*end", "start.middle.eend"); + assertMatch("start*middle*end", "start.middle.enend"); + assertMatch("start*middle*end", "start.middle.endend"); + + assertNonMatch("start*middle*end", "startmiddlend"); + assertNonMatch("start*middle*end", "start.end"); + assertNonMatch("start*middle*end", "start+MIDDLE+end"); + assertNonMatch("start*middle*end", "start+mid+dle+end"); + assertNonMatch("start*middle*end", "start+mid+middle+en"); + } + + private static void assertMatch(String pattern, String str) { + assertThat("Expect [" + str + "] to match '" + pattern + "'", Glob.globMatch(pattern, str), is(true)); + } + + private static void assertNonMatch(String pattern, String str) { + assertThat("Expect [" + str + "] to not match '" + pattern + "'", Glob.globMatch(pattern, str), is(false)); + } + + @FunctionalInterface + interface CharPredicate { + boolean test(char c); + } + + private String randomAsciiString(int length) { + return randomAsciiString(length, ch -> ch >= ' ' && ch <= '~'); + } + + private String randomAsciiString(int length, CharPredicate validCharacters) { + StringBuilder str = new StringBuilder(length); + nextChar: for (int i = 0; i < length; i++) { + for (int attempts = 0; attempts < 200; attempts++) { + char ch = (char) randomIntBetween(0x1, 0x7f); + if (validCharacters.test(ch)) { + str.append(ch); + continue nextChar; + } + } + throw new IllegalStateException("Cannot find valid character for string"); + } + assertThat(str.length(), equalTo(length)); + return str.toString(); + } + +}