Skip to content

Commit cc2370c

Browse files
Multi term intervals: increase max_expansions
Currently multi term interval queries (prefix, wildcard, fuzzy, regexp and range) can expand maximum to 128 terms. To reach parity with span queries that we want to deprecate, set max expansions to indices.query.bool.max_clause_count which is used in span queries. Relates to #110491
1 parent ff74c90 commit cc2370c

File tree

4 files changed

+95
-33
lines changed

4 files changed

+95
-33
lines changed

docs/reference/query-dsl/intervals-query.asciidoc

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -124,8 +124,9 @@ unstemmed ones.
124124
==== `prefix` rule parameters
125125

126126
The `prefix` rule matches terms that start with a specified set of characters.
127-
This prefix can expand to match at most 128 terms. If the prefix matches more
128-
than 128 terms, {es} returns an error. You can use the
127+
This prefix can expand to match at most `indices.query.bool.max_clause_count`
128+
<<search-settings,search setting>> terms. If the prefix matches more terms,
129+
{es} returns an error. You can use the
129130
<<index-prefixes,`index-prefixes`>> option in the field mapping to avoid this
130131
limit.
131132

@@ -151,7 +152,8 @@ separate `analyzer` is specified.
151152
==== `wildcard` rule parameters
152153

153154
The `wildcard` rule matches terms using a wildcard pattern. This pattern can
154-
expand to match at most 128 terms. If the pattern matches more than 128 terms,
155+
expand to match at most `indices.query.bool.max_clause_count`
156+
<<search-settings,search setting>> terms. If the pattern matches more terms,
155157
{es} returns an error.
156158

157159
`pattern`::
@@ -184,8 +186,9 @@ The `pattern` is normalized using the search analyzer from this field, unless
184186
==== `regexp` rule parameters
185187

186188
The `regexp` rule matches terms using a regular expression pattern.
187-
This pattern can expand to match at most 128 terms.
188-
If the pattern matches more than 128 terms,{es} returns an error.
189+
This pattern can expand to match at most `indices.query.bool.max_clause_count`
190+
<<search-settings,search setting>> terms.
191+
If the pattern matches more terms,{es} returns an error.
189192

190193
`pattern`::
191194
(Required, string) Regexp pattern used to find matching terms.
@@ -215,7 +218,8 @@ The `pattern` is normalized using the search analyzer from this field, unless
215218

216219
The `fuzzy` rule matches terms that are similar to the provided term, within an
217220
edit distance defined by <<fuzziness>>. If the fuzzy expansion matches more than
218-
128 terms, {es} returns an error.
221+
`indices.query.bool.max_clause_count`
222+
<<search-settings,search setting>> terms, {es} returns an error.
219223

220224
`term`::
221225
(Required, string) The term to match
@@ -250,8 +254,9 @@ The `term` is normalized using the search analyzer from this field, unless
250254
==== `range` rule parameters
251255

252256
The `range` rule matches terms contained within a provided range.
253-
This range can expand to match at most 128 terms.
254-
If the range matches more than 128 terms,{es} returns an error.
257+
This range can expand to match at most `indices.query.bool.max_clause_count`
258+
<<search-settings,search setting>> terms.
259+
If the range matches more terms,{es} returns an error.
255260

256261
`gt`::
257262
(Optional, string) Greater than: match terms greater than the provided term.

modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import org.apache.lucene.queries.intervals.IntervalsSource;
2222
import org.apache.lucene.search.ConstantScoreQuery;
2323
import org.apache.lucene.search.FuzzyQuery;
24+
import org.apache.lucene.search.IndexSearcher;
2425
import org.apache.lucene.search.MatchAllDocsQuery;
2526
import org.apache.lucene.search.MultiTermQuery;
2627
import org.apache.lucene.search.PrefixQuery;
@@ -270,7 +271,11 @@ public IntervalsSource termIntervals(BytesRef term, SearchExecutionContext conte
270271

271272
@Override
272273
public IntervalsSource prefixIntervals(BytesRef term, SearchExecutionContext context) {
273-
return toIntervalsSource(Intervals.prefix(term), new PrefixQuery(new Term(name(), term)), context);
274+
return toIntervalsSource(
275+
Intervals.prefix(term, IndexSearcher.getMaxClauseCount()),
276+
new PrefixQuery(new Term(name(), term)),
277+
context
278+
);
274279
}
275280

276281
@Override
@@ -285,18 +290,18 @@ public IntervalsSource fuzzyIntervals(
285290
new Term(name(), term),
286291
maxDistance,
287292
prefixLength,
288-
128,
293+
IndexSearcher.getMaxClauseCount(),
289294
transpositions,
290295
MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE
291296
);
292-
IntervalsSource fuzzyIntervals = Intervals.multiterm(fuzzyQuery.getAutomata(), term);
297+
IntervalsSource fuzzyIntervals = Intervals.multiterm(fuzzyQuery.getAutomata(), IndexSearcher.getMaxClauseCount(), term);
293298
return toIntervalsSource(fuzzyIntervals, fuzzyQuery, context);
294299
}
295300

296301
@Override
297302
public IntervalsSource wildcardIntervals(BytesRef pattern, SearchExecutionContext context) {
298303
return toIntervalsSource(
299-
Intervals.wildcard(pattern),
304+
Intervals.wildcard(pattern, IndexSearcher.getMaxClauseCount()),
300305
new MatchAllDocsQuery(), // wildcard queries can be expensive, what should the approximation be?
301306
context
302307
);
@@ -305,7 +310,7 @@ public IntervalsSource wildcardIntervals(BytesRef pattern, SearchExecutionContex
305310
@Override
306311
public IntervalsSource regexpIntervals(BytesRef pattern, SearchExecutionContext context) {
307312
return toIntervalsSource(
308-
Intervals.regexp(pattern),
313+
Intervals.regexp(pattern, IndexSearcher.getMaxClauseCount()),
309314
new MatchAllDocsQuery(), // regexp queries can be expensive, what should the approximation be?
310315
context
311316
);
@@ -320,7 +325,7 @@ public IntervalsSource rangeIntervals(
320325
SearchExecutionContext context
321326
) {
322327
return toIntervalsSource(
323-
Intervals.range(lowerTerm, upperTerm, includeLower, includeUpper),
328+
Intervals.range(lowerTerm, upperTerm, includeLower, includeUpper, IndexSearcher.getMaxClauseCount()),
324329
new MatchAllDocsQuery(), // range queries can be expensive, what should the approximation be?
325330
context
326331
);

server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
import org.apache.lucene.search.BooleanQuery;
3737
import org.apache.lucene.search.ConstantScoreQuery;
3838
import org.apache.lucene.search.FuzzyQuery;
39+
import org.apache.lucene.search.IndexSearcher;
3940
import org.apache.lucene.search.MultiPhraseQuery;
4041
import org.apache.lucene.search.MultiTermQuery;
4142
import org.apache.lucene.search.PhraseQuery;
@@ -620,7 +621,8 @@ public IntervalsSource intervals(BytesRef term) {
620621
return Intervals.fixField(name(), Intervals.term(term));
621622
}
622623
String wildcardTerm = term.utf8ToString() + "?".repeat(Math.max(0, minChars - term.length));
623-
return Intervals.or(Intervals.fixField(name(), Intervals.wildcard(new BytesRef(wildcardTerm))), Intervals.term(term));
624+
return Intervals.or(Intervals.fixField(name(),
625+
Intervals.wildcard(new BytesRef(wildcardTerm), IndexSearcher.getMaxClauseCount())), Intervals.term(term));
624626
}
625627

626628
@Override
@@ -822,7 +824,7 @@ public IntervalsSource prefixIntervals(BytesRef term, SearchExecutionContext con
822824
if (prefixFieldType != null) {
823825
return prefixFieldType.intervals(term);
824826
}
825-
return Intervals.prefix(term);
827+
return Intervals.prefix(term, IndexSearcher.getMaxClauseCount());
826828
}
827829

828830
@Override
@@ -836,24 +838,25 @@ public IntervalsSource fuzzyIntervals(
836838
if (getTextSearchInfo().hasPositions() == false) {
837839
throw new IllegalArgumentException("Cannot create intervals over field [" + name() + "] with no positions indexed");
838840
}
839-
FuzzyQuery fq = new FuzzyQuery(new Term(name(), term), maxDistance, prefixLength, 128, transpositions);
840-
return Intervals.multiterm(fq.getAutomata(), term);
841+
FuzzyQuery fq = new FuzzyQuery(
842+
new Term(name(), term), maxDistance, prefixLength, IndexSearcher.getMaxClauseCount(), transpositions);
843+
return Intervals.multiterm(fq.getAutomata(), IndexSearcher.getMaxClauseCount(), term);
841844
}
842845

843846
@Override
844847
public IntervalsSource wildcardIntervals(BytesRef pattern, SearchExecutionContext context) {
845848
if (getTextSearchInfo().hasPositions() == false) {
846849
throw new IllegalArgumentException("Cannot create intervals over field [" + name() + "] with no positions indexed");
847850
}
848-
return Intervals.wildcard(pattern);
851+
return Intervals.wildcard(pattern, IndexSearcher.getMaxClauseCount());
849852
}
850853

851854
@Override
852855
public IntervalsSource regexpIntervals(BytesRef pattern, SearchExecutionContext context) {
853856
if (getTextSearchInfo().hasPositions() == false) {
854857
throw new IllegalArgumentException("Cannot create intervals over field [" + name() + "] with no positions indexed");
855858
}
856-
return Intervals.regexp(pattern);
859+
return Intervals.regexp(pattern, IndexSearcher.getMaxClauseCount());
857860
}
858861

859862
@Override
@@ -867,7 +870,7 @@ public IntervalsSource rangeIntervals(
867870
if (getTextSearchInfo().hasPositions() == false) {
868871
throw new IllegalArgumentException("Cannot create intervals over field [" + name() + "] with no positions indexed");
869872
}
870-
return Intervals.range(lowerTerm, upperTerm, includeLower, includeUpper);
873+
return Intervals.range(lowerTerm, upperTerm, includeLower, includeUpper, IndexSearcher.getMaxClauseCount());
871874
}
872875

873876
private void checkForPositions() {

server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java

Lines changed: 61 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,22 @@
99

1010
package org.elasticsearch.index.query;
1111

12+
import org.apache.lucene.analysis.core.KeywordAnalyzer;
13+
import org.apache.lucene.document.Field;
14+
import org.apache.lucene.document.TextField;
15+
import org.apache.lucene.index.IndexReader;
1216
import org.apache.lucene.index.Term;
1317
import org.apache.lucene.queries.intervals.IntervalQuery;
1418
import org.apache.lucene.queries.intervals.Intervals;
1519
import org.apache.lucene.queries.intervals.IntervalsSource;
1620
import org.apache.lucene.search.BoostQuery;
1721
import org.apache.lucene.search.FuzzyQuery;
22+
import org.apache.lucene.search.IndexSearcher;
1823
import org.apache.lucene.search.MatchNoDocsQuery;
1924
import org.apache.lucene.search.Query;
25+
import org.apache.lucene.search.ScoreMode;
26+
import org.apache.lucene.store.Directory;
27+
import org.apache.lucene.tests.index.RandomIndexWriter;
2028
import org.apache.lucene.util.BytesRef;
2129
import org.elasticsearch.common.ParsingException;
2230
import org.elasticsearch.common.Strings;
@@ -34,7 +42,9 @@
3442
import java.util.Collections;
3543
import java.util.List;
3644

45+
import static java.util.Collections.singleton;
3746
import static org.elasticsearch.xcontent.XContentFactory.jsonBuilder;
47+
import static org.hamcrest.CoreMatchers.containsString;
3848
import static org.hamcrest.Matchers.equalTo;
3949
import static org.hamcrest.Matchers.instanceOf;
4050

@@ -606,7 +616,7 @@ public void testPrefixes() throws IOException {
606616
}
607617
}""", TEXT_FIELD_NAME);
608618
IntervalQueryBuilder builder = (IntervalQueryBuilder) parseQuery(json);
609-
Query expected = new IntervalQuery(TEXT_FIELD_NAME, Intervals.prefix(new BytesRef("term")));
619+
Query expected = new IntervalQuery(TEXT_FIELD_NAME, Intervals.prefix(new BytesRef("term"), IndexSearcher.getMaxClauseCount()));
610620
assertEquals(expected, builder.toQuery(createSearchExecutionContext()));
611621

612622
String no_positions_json = Strings.format("""
@@ -667,7 +677,8 @@ public void testPrefixes() throws IOException {
667677
builder = (IntervalQueryBuilder) parseQuery(short_prefix_json);
668678
expected = new IntervalQuery(
669679
PREFIXED_FIELD,
670-
Intervals.or(Intervals.fixField(PREFIXED_FIELD + "._index_prefix", Intervals.wildcard(new BytesRef("t?"))), Intervals.term("t"))
680+
Intervals.or(Intervals.fixField(PREFIXED_FIELD + "._index_prefix",
681+
Intervals.wildcard(new BytesRef("t?"), IndexSearcher.getMaxClauseCount())), Intervals.term("t"))
671682
);
672683
assertEquals(expected, builder.toQuery(createSearchExecutionContext()));
673684

@@ -739,7 +750,7 @@ public void testRegexp() throws IOException {
739750
}""", TEXT_FIELD_NAME);
740751

741752
IntervalQueryBuilder builder = (IntervalQueryBuilder) parseQuery(json);
742-
Query expected = new IntervalQuery(TEXT_FIELD_NAME, Intervals.regexp(new BytesRef("te.*m")));
753+
Query expected = new IntervalQuery(TEXT_FIELD_NAME, Intervals.regexp(new BytesRef("te.*m"), IndexSearcher.getMaxClauseCount()));
743754
assertEquals(expected, builder.toQuery(createSearchExecutionContext()));
744755

745756
String no_positions_json = Strings.format("""
@@ -771,7 +782,8 @@ public void testRegexp() throws IOException {
771782
}""", TEXT_FIELD_NAME);
772783

773784
builder = (IntervalQueryBuilder) parseQuery(fixed_field_json);
774-
expected = new IntervalQuery(TEXT_FIELD_NAME, Intervals.fixField(MASKED_FIELD, Intervals.regexp(new BytesRef("te.*m"))));
785+
expected = new IntervalQuery(TEXT_FIELD_NAME, Intervals.fixField(MASKED_FIELD,
786+
Intervals.regexp(new BytesRef("te.*m"), IndexSearcher.getMaxClauseCount())));
775787
assertEquals(expected, builder.toQuery(createSearchExecutionContext()));
776788

777789
String fixed_field_json_no_positions = Strings.format("""
@@ -791,6 +803,38 @@ public void testRegexp() throws IOException {
791803
});
792804
}
793805

806+
public void testMaxExpansionExceptionFailure() throws Exception {
807+
IntervalsSourceProvider provider1 = new IntervalsSourceProvider.Prefix("bar", "keyword", null);
808+
IntervalsSourceProvider provider2 = new IntervalsSourceProvider.Wildcard("bar*", "keyword", null);
809+
IntervalsSourceProvider provider3 = new IntervalsSourceProvider.Fuzzy("bar", 0, true, Fuzziness.fromEdits(1), "keyword",null);
810+
IntervalsSourceProvider provider4 = new IntervalsSourceProvider.Regexp("bar.*", "keyword", null);
811+
IntervalsSourceProvider provider5 = new IntervalsSourceProvider.Range("bar", "bar2", true, true, "keyword", null);
812+
IntervalsSourceProvider provider = randomFrom(provider1, provider2, provider3, provider4, provider5);
813+
814+
try (Directory directory = newDirectory()) {
815+
try (RandomIndexWriter iw = new RandomIndexWriter(random(), directory, new KeywordAnalyzer())) {
816+
for (int i = 0; i < 3; i++) {
817+
iw.addDocument(singleton(new TextField(TEXT_FIELD_NAME, "bar" + i, Field.Store.NO)));
818+
}
819+
try (IndexReader reader = iw.getReader()) {
820+
int origBoolMaxClauseCount = IndexSearcher.getMaxClauseCount();
821+
IndexSearcher.setMaxClauseCount(1);
822+
try {
823+
824+
IntervalQueryBuilder queryBuilder = new IntervalQueryBuilder(TEXT_FIELD_NAME, provider);
825+
IndexSearcher searcher = newSearcher(reader);
826+
Query query = queryBuilder.toQuery(createSearchExecutionContext(searcher));
827+
RuntimeException exc = expectThrows(RuntimeException.class,
828+
() -> query.createWeight(searcher, ScoreMode.COMPLETE, 1.0f).scorer(searcher.getLeafContexts().get(0)));
829+
assertThat(exc.getMessage(), containsString("expanded to too many terms (limit 1)"));
830+
} finally {
831+
IndexSearcher.setMaxClauseCount(origBoolMaxClauseCount);
832+
}
833+
}
834+
}
835+
}
836+
}
837+
794838
public void testWildcard() throws IOException {
795839
String json = Strings.format("""
796840
{
@@ -804,7 +848,7 @@ public void testWildcard() throws IOException {
804848
}""", TEXT_FIELD_NAME);
805849

806850
IntervalQueryBuilder builder = (IntervalQueryBuilder) parseQuery(json);
807-
Query expected = new IntervalQuery(TEXT_FIELD_NAME, Intervals.wildcard(new BytesRef("te?m")));
851+
Query expected = new IntervalQuery(TEXT_FIELD_NAME, Intervals.wildcard(new BytesRef("te?m"), IndexSearcher.getMaxClauseCount()));
808852
assertEquals(expected, builder.toQuery(createSearchExecutionContext()));
809853

810854
String no_positions_json = Strings.format("""
@@ -836,7 +880,7 @@ public void testWildcard() throws IOException {
836880
}""", TEXT_FIELD_NAME);
837881

838882
builder = (IntervalQueryBuilder) parseQuery(keyword_json);
839-
expected = new IntervalQuery(TEXT_FIELD_NAME, Intervals.wildcard(new BytesRef("Te?m")));
883+
expected = new IntervalQuery(TEXT_FIELD_NAME, Intervals.wildcard(new BytesRef("Te?m"), IndexSearcher.getMaxClauseCount()));
840884
assertEquals(expected, builder.toQuery(createSearchExecutionContext()));
841885

842886
String fixed_field_json = Strings.format("""
@@ -852,7 +896,8 @@ public void testWildcard() throws IOException {
852896
}""", TEXT_FIELD_NAME);
853897

854898
builder = (IntervalQueryBuilder) parseQuery(fixed_field_json);
855-
expected = new IntervalQuery(TEXT_FIELD_NAME, Intervals.fixField(MASKED_FIELD, Intervals.wildcard(new BytesRef("te?m"))));
899+
expected = new IntervalQuery(TEXT_FIELD_NAME, Intervals.fixField(MASKED_FIELD,
900+
Intervals.wildcard(new BytesRef("te?m"), IndexSearcher.getMaxClauseCount())));
856901
assertEquals(expected, builder.toQuery(createSearchExecutionContext()));
857902

858903
String fixed_field_json_no_positions = Strings.format("""
@@ -885,13 +930,15 @@ public void testWildcard() throws IOException {
885930
}""", TEXT_FIELD_NAME);
886931

887932
builder = (IntervalQueryBuilder) parseQuery(fixed_field_analyzer_json);
888-
expected = new IntervalQuery(TEXT_FIELD_NAME, Intervals.fixField(MASKED_FIELD, Intervals.wildcard(new BytesRef("Te?m"))));
933+
expected = new IntervalQuery(TEXT_FIELD_NAME, Intervals.fixField(MASKED_FIELD,
934+
Intervals.wildcard(new BytesRef("Te?m"), IndexSearcher.getMaxClauseCount())));
889935
assertEquals(expected, builder.toQuery(createSearchExecutionContext()));
890936
}
891937

892938
private static IntervalsSource buildFuzzySource(String term, String label, int prefixLength, boolean transpositions, int editDistance) {
893-
FuzzyQuery fq = new FuzzyQuery(new Term("field", term), editDistance, prefixLength, 128, transpositions);
894-
return Intervals.multiterm(fq.getAutomata(), label);
939+
FuzzyQuery fq =
940+
new FuzzyQuery(new Term("field", term), editDistance, prefixLength, IndexSearcher.getMaxClauseCount(), transpositions);
941+
return Intervals.multiterm(fq.getAutomata(), IndexSearcher.getMaxClauseCount(), label);
895942
}
896943

897944
public void testFuzzy() throws IOException {
@@ -1011,7 +1058,8 @@ public void testRange() throws IOException {
10111058
}
10121059
}""", TEXT_FIELD_NAME);
10131060
IntervalQueryBuilder builder = (IntervalQueryBuilder) parseQuery(json);
1014-
Query expected = new IntervalQuery(TEXT_FIELD_NAME, Intervals.range(new BytesRef("aaa"), new BytesRef("aab"), true, true));
1061+
Query expected = new IntervalQuery(TEXT_FIELD_NAME,
1062+
Intervals.range(new BytesRef("aaa"), new BytesRef("aab"), true, true, IndexSearcher.getMaxClauseCount()));
10151063
assertEquals(expected, builder.toQuery(createSearchExecutionContext()));
10161064

10171065
json = Strings.format("""
@@ -1026,7 +1074,8 @@ public void testRange() throws IOException {
10261074
}
10271075
}""", TEXT_FIELD_NAME);
10281076
builder = (IntervalQueryBuilder) parseQuery(json);
1029-
expected = new IntervalQuery(TEXT_FIELD_NAME, Intervals.range(new BytesRef("aaa"), new BytesRef("aab"), false, false));
1077+
expected = new IntervalQuery(TEXT_FIELD_NAME,
1078+
Intervals.range(new BytesRef("aaa"), new BytesRef("aab"), false, false, IndexSearcher.getMaxClauseCount()));
10301079
assertEquals(expected, builder.toQuery(createSearchExecutionContext()));
10311080

10321081
String incomplete_range = Strings.format("""

0 commit comments

Comments
 (0)