Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/128362.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 128362
summary: Avoid unnecessary determinization in index pattern conflict checks
area: Indices APIs
type: bug
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -946,12 +946,15 @@ public static Map<String, List<String>> findConflictingV1Templates(
final String candidateName,
final List<String> indexPatterns
) {
Automaton v2automaton = Regex.simpleMatchToAutomaton(indexPatterns.toArray(Strings.EMPTY_ARRAY));
// No need to determinize the automaton, as it is only used to check for intersection with another automaton.
// Determinization is avoided because it can fail or become very costly due to state explosion.
Automaton v2automaton = Regex.simpleMatchToNonDeterminizedAutomaton(indexPatterns.toArray(Strings.EMPTY_ARRAY));
Map<String, List<String>> overlappingTemplates = new HashMap<>();
for (Map.Entry<String, IndexTemplateMetadata> cursor : project.templates().entrySet()) {
String name = cursor.getKey();
IndexTemplateMetadata template = cursor.getValue();
Automaton v1automaton = Regex.simpleMatchToAutomaton(template.patterns().toArray(Strings.EMPTY_ARRAY));
// No need to determinize the automaton, as it is only used to check for intersection with another automaton.
Automaton v1automaton = Regex.simpleMatchToNonDeterminizedAutomaton(template.patterns().toArray(Strings.EMPTY_ARRAY));
if (Operations.isEmpty(Operations.intersection(v2automaton, v1automaton)) == false) {
logger.debug(
"composable template {} and legacy template {} would overlap: {} <=> {}",
Expand Down
39 changes: 32 additions & 7 deletions server/src/main/java/org/elasticsearch/common/regex/Regex.java
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,15 @@ public static boolean isSuffixWildcard(String str) {
return isSuffixMatchPattern(str) && str.endsWith(".*");
}

/** Return an {@link Automaton} that matches the given pattern. */
public static Automaton simpleMatchToAutomaton(String pattern) {
/**
* Return a non-determinized {@link Automaton} that matches the given pattern.
* WARNING: Use this method only when the resulting {@link Automaton} is used in contexts
* that do not require determinism (e.g., checking the intersection of automatons).
*
* For pattern matching with {@link CharacterRunAutomaton}, a deterministic automaton is required.
* In that case, use {@link Regex#simpleMatchToAutomaton} instead.
*/
public static Automaton simpleMatchToNonDeterminizedAutomaton(String pattern) {
List<Automaton> automata = new ArrayList<>();
int previous = 0;
for (int i = pattern.indexOf('*'); i != -1; i = pattern.indexOf('*', i + 1)) {
Expand All @@ -69,13 +76,24 @@ public static Automaton simpleMatchToAutomaton(String pattern) {
previous = i + 1;
}
automata.add(Automata.makeString(pattern.substring(previous)));
return Operations.determinize(Operations.concatenate(automata), Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
return Operations.concatenate(automata);
}

/** Return a deterministic {@link Automaton} that matches the given pattern. */
public static Automaton simpleMatchToAutomaton(String pattern) {
return Operations.determinize(simpleMatchToNonDeterminizedAutomaton(pattern), Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
}

/**
* Return an Automaton that matches the union of the provided patterns.
* Returns a non-deterministic {@link Automaton} that matches the union of the given patterns.
*
* WARNING: Use this method only when the resulting {@link Automaton} is used in contexts
* that do not require determinism (e.g., checking the intersection of automatons).
*
* For pattern matching with {@link CharacterRunAutomaton}, a deterministic automaton is required.
* In that case, use {@link Regex#simpleMatchToAutomaton} instead.
*/
public static Automaton simpleMatchToAutomaton(String... patterns) {
public static Automaton simpleMatchToNonDeterminizedAutomaton(String... patterns) {
if (patterns.length < 1) {
throw new IllegalArgumentException("There must be at least one pattern, zero given");
}
Expand All @@ -88,7 +106,7 @@ public static Automaton simpleMatchToAutomaton(String... patterns) {
if (isSuffixWildcard(pattern) && pattern.length() < 1000) {
prefixes.add(new BytesRef(pattern.substring(0, pattern.length() - 1)));
} else if (isSimpleMatchPattern(pattern) || pattern.length() >= 1000) {
automata.add(simpleMatchToAutomaton(pattern));
automata.add(simpleMatchToNonDeterminizedAutomaton(pattern));
} else {
simpleStrings.add(new BytesRef(pattern));
}
Expand All @@ -113,7 +131,14 @@ public static Automaton simpleMatchToAutomaton(String... patterns) {
prefixAutomaton.add(Automata.makeAnyString());
automata.add(Operations.concatenate(prefixAutomaton));
}
return Operations.determinize(Operations.union(automata), Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
return Operations.union(automata);
}

/**
* Return a deterministic Automaton that matches the union of the provided patterns.
*/
public static Automaton simpleMatchToAutomaton(String... patterns) {
return Operations.determinize(simpleMatchToNonDeterminizedAutomaton(patterns), Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,20 @@

import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
import org.apache.lucene.util.automaton.Operations;
import org.apache.lucene.util.automaton.TooComplexToDeterminizeException;
import org.elasticsearch.test.ESTestCase;

import java.io.IOException;
import java.util.Arrays;
import java.util.Locale;
import java.util.Random;
import java.util.function.Predicate;
import java.util.regex.Pattern;

import static org.elasticsearch.test.LambdaMatchers.falseWith;
import static org.elasticsearch.test.LambdaMatchers.trueWith;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;

public class RegexTests extends ESTestCase {
Expand Down Expand Up @@ -250,4 +254,14 @@ public void testThousandsAndLongPattern() throws IOException {
assertTrue(predicate.test(patterns[i]));
}
}

public void testIntersectNonDeterminizedAutomaton() {
String[] patterns = randomArray(20, 100, size -> new String[size], () -> "*" + randomAlphanumericOfLength(10) + "*");
Automaton a = Regex.simpleMatchToNonDeterminizedAutomaton(patterns);
Automaton b = Regex.simpleMatchToNonDeterminizedAutomaton(Arrays.copyOfRange(patterns, patterns.length / 2, patterns.length));
assertFalse(Operations.isEmpty(Operations.intersection(a, b)));
IllegalArgumentException exc = expectThrows(IllegalArgumentException.class, () -> assertMatchesAll(a, "my_test"));
assertThat(exc.getMessage(), containsString("deterministic"));
expectThrows(TooComplexToDeterminizeException.class, () -> Regex.simpleMatchToAutomaton(patterns));
}
}
Loading