Skip to content

Commit 95d7014

Browse files
committed
Merge remote-tracking branch 'upstream/main' into include_vectors_source_option
2 parents 74182d5 + 8312613 commit 95d7014

File tree

4 files changed

+60
-9
lines changed

4 files changed

+60
-9
lines changed

docs/changelog/128362.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 128362
2+
summary: Avoid unnecessary determinization in index pattern conflict checks
3+
area: Indices APIs
4+
type: bug
5+
issues: []

server/src/main/java/org/elasticsearch/cluster/metadata/MetadataIndexTemplateService.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -946,12 +946,15 @@ public static Map<String, List<String>> findConflictingV1Templates(
946946
final String candidateName,
947947
final List<String> indexPatterns
948948
) {
949-
Automaton v2automaton = Regex.simpleMatchToAutomaton(indexPatterns.toArray(Strings.EMPTY_ARRAY));
949+
// No need to determinize the automaton, as it is only used to check for intersection with another automaton.
950+
// Determinization is avoided because it can fail or become very costly due to state explosion.
951+
Automaton v2automaton = Regex.simpleMatchToNonDeterminizedAutomaton(indexPatterns.toArray(Strings.EMPTY_ARRAY));
950952
Map<String, List<String>> overlappingTemplates = new HashMap<>();
951953
for (Map.Entry<String, IndexTemplateMetadata> cursor : project.templates().entrySet()) {
952954
String name = cursor.getKey();
953955
IndexTemplateMetadata template = cursor.getValue();
954-
Automaton v1automaton = Regex.simpleMatchToAutomaton(template.patterns().toArray(Strings.EMPTY_ARRAY));
956+
// No need to determinize the automaton, as it is only used to check for intersection with another automaton.
957+
Automaton v1automaton = Regex.simpleMatchToNonDeterminizedAutomaton(template.patterns().toArray(Strings.EMPTY_ARRAY));
955958
if (Operations.isEmpty(Operations.intersection(v2automaton, v1automaton)) == false) {
956959
logger.debug(
957960
"composable template {} and legacy template {} would overlap: {} <=> {}",

server/src/main/java/org/elasticsearch/common/regex/Regex.java

Lines changed: 32 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,15 @@ public static boolean isSuffixWildcard(String str) {
5959
return isSuffixMatchPattern(str) && str.endsWith(".*");
6060
}
6161

62-
/** Return an {@link Automaton} that matches the given pattern. */
63-
public static Automaton simpleMatchToAutomaton(String pattern) {
62+
/**
63+
* Return a non-determinized {@link Automaton} that matches the given pattern.
64+
* WARNING: Use this method only when the resulting {@link Automaton} is used in contexts
65+
* that do not require determinism (e.g., checking the intersection of automatons).
66+
*
67+
* For pattern matching with {@link CharacterRunAutomaton}, a deterministic automaton is required.
68+
* In that case, use {@link Regex#simpleMatchToAutomaton} instead.
69+
*/
70+
public static Automaton simpleMatchToNonDeterminizedAutomaton(String pattern) {
6471
List<Automaton> automata = new ArrayList<>();
6572
int previous = 0;
6673
for (int i = pattern.indexOf('*'); i != -1; i = pattern.indexOf('*', i + 1)) {
@@ -69,13 +76,24 @@ public static Automaton simpleMatchToAutomaton(String pattern) {
6976
previous = i + 1;
7077
}
7178
automata.add(Automata.makeString(pattern.substring(previous)));
72-
return Operations.determinize(Operations.concatenate(automata), Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
79+
return Operations.concatenate(automata);
80+
}
81+
82+
/** Return a deterministic {@link Automaton} that matches the given pattern. */
83+
public static Automaton simpleMatchToAutomaton(String pattern) {
84+
return Operations.determinize(simpleMatchToNonDeterminizedAutomaton(pattern), Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
7385
}
7486

7587
/**
76-
* Return an Automaton that matches the union of the provided patterns.
88+
* Returns a non-deterministic {@link Automaton} that matches the union of the given patterns.
89+
*
90+
* WARNING: Use this method only when the resulting {@link Automaton} is used in contexts
91+
* that do not require determinism (e.g., checking the intersection of automatons).
92+
*
93+
* For pattern matching with {@link CharacterRunAutomaton}, a deterministic automaton is required.
94+
* In that case, use {@link Regex#simpleMatchToAutomaton} instead.
7795
*/
78-
public static Automaton simpleMatchToAutomaton(String... patterns) {
96+
public static Automaton simpleMatchToNonDeterminizedAutomaton(String... patterns) {
7997
if (patterns.length < 1) {
8098
throw new IllegalArgumentException("There must be at least one pattern, zero given");
8199
}
@@ -88,7 +106,7 @@ public static Automaton simpleMatchToAutomaton(String... patterns) {
88106
if (isSuffixWildcard(pattern) && pattern.length() < 1000) {
89107
prefixes.add(new BytesRef(pattern.substring(0, pattern.length() - 1)));
90108
} else if (isSimpleMatchPattern(pattern) || pattern.length() >= 1000) {
91-
automata.add(simpleMatchToAutomaton(pattern));
109+
automata.add(simpleMatchToNonDeterminizedAutomaton(pattern));
92110
} else {
93111
simpleStrings.add(new BytesRef(pattern));
94112
}
@@ -113,7 +131,14 @@ public static Automaton simpleMatchToAutomaton(String... patterns) {
113131
prefixAutomaton.add(Automata.makeAnyString());
114132
automata.add(Operations.concatenate(prefixAutomaton));
115133
}
116-
return Operations.determinize(Operations.union(automata), Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
134+
return Operations.union(automata);
135+
}
136+
137+
/**
138+
* Return a deterministic Automaton that matches the union of the provided patterns.
139+
*/
140+
public static Automaton simpleMatchToAutomaton(String... patterns) {
141+
return Operations.determinize(simpleMatchToNonDeterminizedAutomaton(patterns), Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
117142
}
118143

119144
/**

server/src/test/java/org/elasticsearch/common/regex/RegexTests.java

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,20 @@
1010

1111
import org.apache.lucene.util.automaton.Automaton;
1212
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
13+
import org.apache.lucene.util.automaton.Operations;
14+
import org.apache.lucene.util.automaton.TooComplexToDeterminizeException;
1315
import org.elasticsearch.test.ESTestCase;
1416

1517
import java.io.IOException;
18+
import java.util.Arrays;
1619
import java.util.Locale;
1720
import java.util.Random;
1821
import java.util.function.Predicate;
1922
import java.util.regex.Pattern;
2023

2124
import static org.elasticsearch.test.LambdaMatchers.falseWith;
2225
import static org.elasticsearch.test.LambdaMatchers.trueWith;
26+
import static org.hamcrest.Matchers.containsString;
2327
import static org.hamcrest.Matchers.equalTo;
2428

2529
public class RegexTests extends ESTestCase {
@@ -250,4 +254,18 @@ public void testThousandsAndLongPattern() throws IOException {
250254
assertTrue(predicate.test(patterns[i]));
251255
}
252256
}
257+
258+
public void testIntersectNonDeterminizedAutomaton() {
259+
// patterns too complex to determinize within the default limit
260+
String[] patterns = randomArray(20, 100, size -> new String[size], () -> "*" + randomAlphanumericOfLength(10) + "*");
261+
Automaton a = Regex.simpleMatchToNonDeterminizedAutomaton(patterns);
262+
assertFalse(a.isDeterministic());
263+
Automaton b = Regex.simpleMatchToNonDeterminizedAutomaton(Arrays.copyOfRange(patterns, patterns.length / 2, patterns.length));
264+
assertFalse(b.isDeterministic());
265+
assertFalse(Operations.isEmpty(Operations.intersection(a, b)));
266+
IllegalArgumentException exc = expectThrows(IllegalArgumentException.class, () -> assertMatchesAll(a, "my_test"));
267+
// the run automaton expects a deterministic automaton
268+
assertThat(exc.getMessage(), containsString("deterministic"));
269+
expectThrows(TooComplexToDeterminizeException.class, () -> Regex.simpleMatchToAutomaton(patterns));
270+
}
253271
}

0 commit comments

Comments
 (0)