Skip to content

Commit c0342b0

Browse files
MattAlpelasticsearchmachine
andauthored
Catch-and-rethrow TooComplexToDeterminizeException within ESQL (#137024)
--------- Co-authored-by: elasticsearchmachine <[email protected]>
1 parent e949036 commit c0342b0

File tree

12 files changed

+89
-12
lines changed

12 files changed

+89
-12
lines changed

docs/changelog/137024.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 137024
2+
summary: Catch-and-rethrow `TooComplexToDeterminizeException` within ESQL
3+
area: ES|QL
4+
type: bug
5+
issues: []

x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/predicate/regex/AbstractStringPattern.java

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,21 @@
1111
import org.apache.lucene.util.UnicodeUtil;
1212
import org.apache.lucene.util.automaton.Automaton;
1313
import org.apache.lucene.util.automaton.Operations;
14+
import org.apache.lucene.util.automaton.TooComplexToDeterminizeException;
1415

1516
public abstract class AbstractStringPattern implements StringPattern {
1617

1718
private Automaton automaton;
1819

19-
public abstract Automaton createAutomaton(boolean ignoreCase);
20+
public final Automaton createAutomaton(boolean ignoreCase) {
21+
try {
22+
return doCreateAutomaton(ignoreCase);
23+
} catch (TooComplexToDeterminizeException e) {
24+
throw new IllegalArgumentException("Pattern was too complex to determinize", e);
25+
}
26+
}
27+
28+
protected abstract Automaton doCreateAutomaton(boolean ignoreCase);
2029

2130
private Automaton automaton() {
2231
if (automaton == null) {

x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/predicate/regex/RLikePattern.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ public void writeTo(StreamOutput out) throws IOException {
3434
}
3535

3636
@Override
37-
public Automaton createAutomaton(boolean ignoreCase) {
37+
protected Automaton doCreateAutomaton(boolean ignoreCase) {
3838
int matchFlags = ignoreCase ? RegExp.CASE_INSENSITIVE : 0;
3939
return Operations.determinize(
4040
new RegExp(regexpPattern, RegExp.ALL | RegExp.DEPRECATED_COMPLEMENT, matchFlags).toAutomaton(),

x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/predicate/regex/RLikePatternList.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ public List<RLikePattern> patternList() {
4343
* We create a single automaton that is the union of all individual automatons to improve performance
4444
*/
4545
@Override
46-
public Automaton createAutomaton(boolean ignoreCase) {
46+
protected Automaton doCreateAutomaton(boolean ignoreCase) {
4747
List<Automaton> automatonList = patternList.stream().map(x -> x.createAutomaton(ignoreCase)).toList();
4848
Automaton result = Operations.union(automatonList);
4949
return Operations.determinize(result, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);

x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/predicate/regex/WildcardPattern.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ public String pattern() {
5353
}
5454

5555
@Override
56-
public Automaton createAutomaton(boolean ignoreCase) {
56+
protected Automaton doCreateAutomaton(boolean ignoreCase) {
5757
return ignoreCase
5858
? Operations.determinize(
5959
new RegExp(luceneWildcardToRegExp(wildcard), RegExp.ALL | RegExp.DEPRECATED_COMPLEMENT, RegExp.CASE_INSENSITIVE)

x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/predicate/regex/WildcardPatternList.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ public List<WildcardPattern> patternList() {
5151
* We create a single automaton that is the union of all individual automata to improve performance
5252
*/
5353
@Override
54-
public Automaton createAutomaton(boolean ignoreCase) {
54+
protected Automaton doCreateAutomaton(boolean ignoreCase) {
5555
List<Automaton> automatonList = patternList.stream().map(x -> x.createAutomaton(ignoreCase)).toList();
5656
Automaton result = Operations.union(automatonList);
5757
return Operations.determinize(result, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);

x-pack/plugin/esql-core/src/test/java/org/elasticsearch/xpack/esql/core/expression/predicate/regex/StringPatternTests.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,4 +106,12 @@ public void testRegexExactMatchWithEmptyMatch() {
106106
assertNotNull(exactMatchRLike("#|foo"));
107107
assertNotNull(exactMatchRLike("foo|#"));
108108
}
109+
110+
public void testTooComplexPattern() {
111+
var e = expectThrows(IllegalArgumentException.class, () -> rlike("(a|b)*a(a|b){13}").createAutomaton(false));
112+
assertEquals("Pattern was too complex to determinize", e.getMessage());
113+
114+
e = expectThrows(IllegalArgumentException.class, () -> like("*a?????????????").createAutomaton(false));
115+
assertEquals("Pattern was too complex to determinize", e.getMessage());
116+
}
109117
}

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1579,6 +1579,11 @@ public enum Cap {
15791579
*/
15801580
FORBID_LIMIT_BEFORE_INLINE_STATS(INLINE_STATS.enabled),
15811581

1582+
/**
1583+
* Catch-and-rethrow determinization complexity errors as 400s rather than 500s
1584+
*/
1585+
HANDLE_DETERMINIZATION_COMPLEXITY,
1586+
15821587
/**
15831588
* Support for the TRANGE function
15841589
*/

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/AutomataMatch.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import org.apache.lucene.util.automaton.Automaton;
1212
import org.apache.lucene.util.automaton.ByteRunAutomaton;
1313
import org.apache.lucene.util.automaton.Operations;
14+
import org.apache.lucene.util.automaton.TooComplexToDeterminizeException;
1415
import org.apache.lucene.util.automaton.Transition;
1516
import org.apache.lucene.util.automaton.UTF32ToUTF8;
1617
import org.elasticsearch.compute.ann.Evaluator;
@@ -36,7 +37,13 @@ public static EvalOperator.ExpressionEvaluator.Factory toEvaluator(
3637
* ByteRunAutomaton has a way to convert utf32 to utf8, but if we used it
3738
* we couldn’t get a nice toDot - so we call UTF32ToUTF8 ourselves.
3839
*/
39-
Automaton automaton = Operations.determinize(new UTF32ToUTF8().convert(utf32Automaton), Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
40+
Automaton automaton;
41+
try {
42+
automaton = Operations.determinize(new UTF32ToUTF8().convert(utf32Automaton), Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
43+
} catch (TooComplexToDeterminizeException e) {
44+
throw new IllegalArgumentException("Pattern was too complex to determinize", e);
45+
}
46+
4047
ByteRunAutomaton run = new ByteRunAutomaton(automaton, true);
4148
return new AutomataMatchEvaluator.Factory(source, field, run, toDot(automaton));
4249
}

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/ExpressionBuilder.java

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import org.apache.lucene.util.automaton.Automaton;
1616
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
1717
import org.apache.lucene.util.automaton.Operations;
18+
import org.apache.lucene.util.automaton.TooComplexToDeterminizeException;
1819
import org.elasticsearch.common.Strings;
1920
import org.elasticsearch.common.lucene.BytesRefs;
2021
import org.elasticsearch.common.regex.Regex;
@@ -485,12 +486,18 @@ public NamedExpression visitQualifiedNamePattern(EsqlBaseParser.QualifiedNamePat
485486
list.add(o instanceof Automaton a ? a : Automata.makeString(o.toString()));
486487
}
487488
// use the fast run variant
488-
result = new UnresolvedNamePattern(
489-
src,
490-
new CharacterRunAutomaton(Operations.determinize(Operations.concatenate(list), Operations.DEFAULT_DETERMINIZE_WORK_LIMIT)),
491-
patternString.toString(),
492-
nameString.toString()
493-
);
489+
try {
490+
result = new UnresolvedNamePattern(
491+
src,
492+
new CharacterRunAutomaton(
493+
Operations.determinize(Operations.concatenate(list), Operations.DEFAULT_DETERMINIZE_WORK_LIMIT)
494+
),
495+
patternString.toString(),
496+
nameString.toString()
497+
);
498+
} catch (TooComplexToDeterminizeException e) {
499+
throw new ParsingException("Pattern was too complex to determinize", e);
500+
}
494501

495502
if (qualifier != null) {
496503
throw qualifiersUnsupportedInPatterns(src, qualifiedCtx.getText());

0 commit comments

Comments
 (0)