From 5c1a12b82b8e3d6d9320622f17528e35a9b2c0c9 Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Mon, 2 Jun 2025 10:39:35 +0200 Subject: [PATCH] Avoid unnecessary determinization in index pattern conflict checks (#128362) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Starting with Lucene 10, `CharacterRunAutomaton` is no longer determinized automatically. In Elasticsearch 9, we adapted to this by eagerly determinizing automatons early (via `Regex#simpleMatchToAutomaton`). However, this introduced regression: operations like index template conflict checks, which only require intersection testing, now pay the cost of determinization—an expensive step that wasn’t needed before. In some cases, especially when many wildcard patterns are involved, determinization can even fail due to state explosion. This change removes the unnecessary determinization, restoring the pre-9.0 behavior and allowing valid index templates with many patterns to be registered again. --- docs/changelog/128362.yaml | 5 +++ .../MetadataIndexTemplateService.java | 7 +++- .../org/elasticsearch/common/regex/Regex.java | 39 +++++++++++++++---- .../common/regex/RegexTests.java | 18 +++++++++ 4 files changed, 60 insertions(+), 9 deletions(-) create mode 100644 docs/changelog/128362.yaml diff --git a/docs/changelog/128362.yaml b/docs/changelog/128362.yaml new file mode 100644 index 0000000000000..89a59a18f645a --- /dev/null +++ b/docs/changelog/128362.yaml @@ -0,0 +1,5 @@ +pr: 128362 +summary: Avoid unnecessary determinization in index pattern conflict checks +area: Indices APIs +type: bug +issues: [] diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataIndexTemplateService.java b/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataIndexTemplateService.java index c8534f16ebeba..a9798022afbea 100644 --- a/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataIndexTemplateService.java +++ b/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataIndexTemplateService.java @@ -901,12 +901,15 @@ public static Map> findConflictingV1Templates( final String candidateName, final List indexPatterns ) { - Automaton v2automaton = Regex.simpleMatchToAutomaton(indexPatterns.toArray(Strings.EMPTY_ARRAY)); + // No need to determinize the automaton, as it is only used to check for intersection with another automaton. + // Determinization is avoided because it can fail or become very costly due to state explosion. + Automaton v2automaton = Regex.simpleMatchToNonDeterminizedAutomaton(indexPatterns.toArray(Strings.EMPTY_ARRAY)); Map> overlappingTemplates = new HashMap<>(); for (Map.Entry cursor : state.metadata().templates().entrySet()) { String name = cursor.getKey(); IndexTemplateMetadata template = cursor.getValue(); - Automaton v1automaton = Regex.simpleMatchToAutomaton(template.patterns().toArray(Strings.EMPTY_ARRAY)); + // No need to determinize the automaton, as it is only used to check for intersection with another automaton. + Automaton v1automaton = Regex.simpleMatchToNonDeterminizedAutomaton(template.patterns().toArray(Strings.EMPTY_ARRAY)); if (Operations.isEmpty(Operations.intersection(v2automaton, v1automaton)) == false) { logger.debug( "composable template {} and legacy template {} would overlap: {} <=> {}", diff --git a/server/src/main/java/org/elasticsearch/common/regex/Regex.java b/server/src/main/java/org/elasticsearch/common/regex/Regex.java index aaaab78b71736..4b7de8787b683 100644 --- a/server/src/main/java/org/elasticsearch/common/regex/Regex.java +++ b/server/src/main/java/org/elasticsearch/common/regex/Regex.java @@ -59,8 +59,15 @@ public static boolean isSuffixWildcard(String str) { return isSuffixMatchPattern(str) && str.endsWith(".*"); } - /** Return an {@link Automaton} that matches the given pattern. */ - public static Automaton simpleMatchToAutomaton(String pattern) { + /** + * Return a non-determinized {@link Automaton} that matches the given pattern. + * WARNING: Use this method only when the resulting {@link Automaton} is used in contexts + * that do not require determinism (e.g., checking the intersection of automatons). + * + * For pattern matching with {@link CharacterRunAutomaton}, a deterministic automaton is required. + * In that case, use {@link Regex#simpleMatchToAutomaton} instead. + */ + public static Automaton simpleMatchToNonDeterminizedAutomaton(String pattern) { List automata = new ArrayList<>(); int previous = 0; for (int i = pattern.indexOf('*'); i != -1; i = pattern.indexOf('*', i + 1)) { @@ -69,13 +76,24 @@ public static Automaton simpleMatchToAutomaton(String pattern) { previous = i + 1; } automata.add(Automata.makeString(pattern.substring(previous))); - return Operations.determinize(Operations.concatenate(automata), Operations.DEFAULT_DETERMINIZE_WORK_LIMIT); + return Operations.concatenate(automata); + } + + /** Return a deterministic {@link Automaton} that matches the given pattern. */ + public static Automaton simpleMatchToAutomaton(String pattern) { + return Operations.determinize(simpleMatchToNonDeterminizedAutomaton(pattern), Operations.DEFAULT_DETERMINIZE_WORK_LIMIT); } /** - * Return an Automaton that matches the union of the provided patterns. + * Returns a non-deterministic {@link Automaton} that matches the union of the given patterns. + * + * WARNING: Use this method only when the resulting {@link Automaton} is used in contexts + * that do not require determinism (e.g., checking the intersection of automatons). + * + * For pattern matching with {@link CharacterRunAutomaton}, a deterministic automaton is required. + * In that case, use {@link Regex#simpleMatchToAutomaton} instead. */ - public static Automaton simpleMatchToAutomaton(String... patterns) { + public static Automaton simpleMatchToNonDeterminizedAutomaton(String... patterns) { if (patterns.length < 1) { throw new IllegalArgumentException("There must be at least one pattern, zero given"); } @@ -88,7 +106,7 @@ public static Automaton simpleMatchToAutomaton(String... patterns) { if (isSuffixWildcard(pattern) && pattern.length() < 1000) { prefixes.add(new BytesRef(pattern.substring(0, pattern.length() - 1))); } else if (isSimpleMatchPattern(pattern) || pattern.length() >= 1000) { - automata.add(simpleMatchToAutomaton(pattern)); + automata.add(simpleMatchToNonDeterminizedAutomaton(pattern)); } else { simpleStrings.add(new BytesRef(pattern)); } @@ -113,7 +131,14 @@ public static Automaton simpleMatchToAutomaton(String... patterns) { prefixAutomaton.add(Automata.makeAnyString()); automata.add(Operations.concatenate(prefixAutomaton)); } - return Operations.determinize(Operations.union(automata), Operations.DEFAULT_DETERMINIZE_WORK_LIMIT); + return Operations.union(automata); + } + + /** + * Return a deterministic Automaton that matches the union of the provided patterns. + */ + public static Automaton simpleMatchToAutomaton(String... patterns) { + return Operations.determinize(simpleMatchToNonDeterminizedAutomaton(patterns), Operations.DEFAULT_DETERMINIZE_WORK_LIMIT); } /** diff --git a/server/src/test/java/org/elasticsearch/common/regex/RegexTests.java b/server/src/test/java/org/elasticsearch/common/regex/RegexTests.java index e0b9da5eb455d..ab5b315da8490 100644 --- a/server/src/test/java/org/elasticsearch/common/regex/RegexTests.java +++ b/server/src/test/java/org/elasticsearch/common/regex/RegexTests.java @@ -10,9 +10,12 @@ import org.apache.lucene.util.automaton.Automaton; import org.apache.lucene.util.automaton.CharacterRunAutomaton; +import org.apache.lucene.util.automaton.Operations; +import org.apache.lucene.util.automaton.TooComplexToDeterminizeException; import org.elasticsearch.test.ESTestCase; import java.io.IOException; +import java.util.Arrays; import java.util.Locale; import java.util.Random; import java.util.function.Predicate; @@ -20,6 +23,7 @@ import static org.elasticsearch.test.LambdaMatchers.falseWith; import static org.elasticsearch.test.LambdaMatchers.trueWith; +import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; public class RegexTests extends ESTestCase { @@ -250,4 +254,18 @@ public void testThousandsAndLongPattern() throws IOException { assertTrue(predicate.test(patterns[i])); } } + + public void testIntersectNonDeterminizedAutomaton() { + // patterns too complex to determinize within the default limit + String[] patterns = randomArray(20, 100, size -> new String[size], () -> "*" + randomAlphanumericOfLength(10) + "*"); + Automaton a = Regex.simpleMatchToNonDeterminizedAutomaton(patterns); + assertFalse(a.isDeterministic()); + Automaton b = Regex.simpleMatchToNonDeterminizedAutomaton(Arrays.copyOfRange(patterns, patterns.length / 2, patterns.length)); + assertFalse(b.isDeterministic()); + assertFalse(Operations.isEmpty(Operations.intersection(a, b))); + IllegalArgumentException exc = expectThrows(IllegalArgumentException.class, () -> assertMatchesAll(a, "my_test")); + // the run automaton expects a deterministic automaton + assertThat(exc.getMessage(), containsString("deterministic")); + expectThrows(TooComplexToDeterminizeException.class, () -> Regex.simpleMatchToAutomaton(patterns)); + } }