Skip to content

Commit e6e613c

Browse files
committed
Avoid unnecessary determinization in index pattern conflict checks
Starting with Lucene 10, `CharacterRunAutomaton` is no longer determinized automatically. In Elasticsearch 9, we adapted to this by eagerly determinizing automatons early (via `Regex#simpleMatchToAutomaton`). However, this introduced regression: operations like index template conflict checks, which only require intersection testing, now pay the cost of determinization—an expensive step that wasn’t needed before. In some cases, especially when many wildcard patterns are involved, determinization can even fail due to state explosion. This change removes the unnecessary determinization, restoring the pre-9.0 behavior and allowing valid index templates with many patterns to be registered again.
1 parent d7aebb8 commit e6e613c

File tree

3 files changed

+29
-5
lines changed

3 files changed

+29
-5
lines changed

server/src/main/java/org/elasticsearch/cluster/metadata/MetadataIndexTemplateService.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -946,12 +946,15 @@ public static Map<String, List<String>> findConflictingV1Templates(
946946
final String candidateName,
947947
final List<String> indexPatterns
948948
) {
949-
Automaton v2automaton = Regex.simpleMatchToAutomaton(indexPatterns.toArray(Strings.EMPTY_ARRAY));
949+
// No need to determinize the automaton, as it is only used to check for intersection with another automaton.
950+
// Determinization is avoided because it can fail or become very costly due to state explosion.
951+
Automaton v2automaton = Regex.simpleMatchToNonDeterminizedAutomaton(indexPatterns.toArray(Strings.EMPTY_ARRAY));
950952
Map<String, List<String>> overlappingTemplates = new HashMap<>();
951953
for (Map.Entry<String, IndexTemplateMetadata> cursor : project.templates().entrySet()) {
952954
String name = cursor.getKey();
953955
IndexTemplateMetadata template = cursor.getValue();
954-
Automaton v1automaton = Regex.simpleMatchToAutomaton(template.patterns().toArray(Strings.EMPTY_ARRAY));
956+
// No need to determinize the automaton, as it is only used to check for intersection with another automaton.
957+
Automaton v1automaton = Regex.simpleMatchToNonDeterminizedAutomaton(template.patterns().toArray(Strings.EMPTY_ARRAY));
955958
if (Operations.isEmpty(Operations.intersection(v2automaton, v1automaton)) == false) {
956959
logger.debug(
957960
"composable template {} and legacy template {} would overlap: {} <=> {}",

server/src/main/java/org/elasticsearch/common/regex/Regex.java

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -73,9 +73,9 @@ public static Automaton simpleMatchToAutomaton(String pattern) {
7373
}
7474

7575
/**
76-
* Return an Automaton that matches the union of the provided patterns.
76+
* Return an non-determinized Automaton that matches the union of the provided patterns.
7777
*/
78-
public static Automaton simpleMatchToAutomaton(String... patterns) {
78+
public static Automaton simpleMatchToNonDeterminizedAutomaton(String... patterns) {
7979
if (patterns.length < 1) {
8080
throw new IllegalArgumentException("There must be at least one pattern, zero given");
8181
}
@@ -113,7 +113,14 @@ public static Automaton simpleMatchToAutomaton(String... patterns) {
113113
prefixAutomaton.add(Automata.makeAnyString());
114114
automata.add(Operations.concatenate(prefixAutomaton));
115115
}
116-
return Operations.determinize(Operations.union(automata), Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
116+
return Operations.union(automata);
117+
}
118+
119+
/**
120+
* Return a deterministic Automaton that matches the union of the provided patterns.
121+
*/
122+
public static Automaton simpleMatchToAutomaton(String... patterns) {
123+
return Operations.determinize(simpleMatchToNonDeterminizedAutomaton(patterns), Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
117124
}
118125

119126
/**

server/src/test/java/org/elasticsearch/common/regex/RegexTests.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,20 @@
1010

1111
import org.apache.lucene.util.automaton.Automaton;
1212
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
13+
import org.apache.lucene.util.automaton.Operations;
14+
import org.apache.lucene.util.automaton.TooComplexToDeterminizeException;
1315
import org.elasticsearch.test.ESTestCase;
1416

1517
import java.io.IOException;
18+
import java.util.Arrays;
1619
import java.util.Locale;
1720
import java.util.Random;
1821
import java.util.function.Predicate;
1922
import java.util.regex.Pattern;
2023

2124
import static org.elasticsearch.test.LambdaMatchers.falseWith;
2225
import static org.elasticsearch.test.LambdaMatchers.trueWith;
26+
import static org.hamcrest.Matchers.containsString;
2327
import static org.hamcrest.Matchers.equalTo;
2428

2529
public class RegexTests extends ESTestCase {
@@ -250,4 +254,14 @@ public void testThousandsAndLongPattern() throws IOException {
250254
assertTrue(predicate.test(patterns[i]));
251255
}
252256
}
257+
258+
public void testIntersectNonDeterminizedAutomaton() {
259+
String[] patterns = randomArray(20, 100, size -> new String[size], () -> "*" + randomAlphanumericOfLength(10) + "*");
260+
Automaton a = Regex.simpleMatchToNonDeterminizedAutomaton(patterns);
261+
Automaton b = Regex.simpleMatchToNonDeterminizedAutomaton(Arrays.copyOfRange(patterns, patterns.length / 2, patterns.length));
262+
assertFalse(Operations.isEmpty(Operations.intersection(a, b)));
263+
IllegalArgumentException exc = expectThrows(IllegalArgumentException.class, () -> assertMatchesAll(a, "my_test"));
264+
assertThat(exc.getMessage(), containsString("deterministic"));
265+
expectThrows(TooComplexToDeterminizeException.class, () -> Regex.simpleMatchToAutomaton(patterns));
266+
}
253267
}

0 commit comments

Comments
 (0)