Skip to content

Commit 8535e4c

Browse files
Multi term intervals: increase max_expansions
Currently multi term interval queries (prefix, wildcard, fuzzy, regexp and range) can expand maximum to 128 terms. To reach parity with span queries that we want to deprecate, set max expansions to indices.query.bool.max_clause_count which is used in span queries. Relates to #110491
1 parent d45dbc7 commit 8535e4c

File tree

4 files changed

+92
-33
lines changed

4 files changed

+92
-33
lines changed

docs/reference/query-dsl/intervals-query.asciidoc

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -124,8 +124,9 @@ unstemmed ones.
124124
==== `prefix` rule parameters
125125

126126
The `prefix` rule matches terms that start with a specified set of characters.
127-
This prefix can expand to match at most 128 terms. If the prefix matches more
128-
than 128 terms, {es} returns an error. You can use the
127+
This prefix can expand to match at most `indices.query.bool.max_clause_count`
128+
<<search-settings,search setting>> terms. If the prefix matches more terms,
129+
{es} returns an error. You can use the
129130
<<index-prefixes,`index-prefixes`>> option in the field mapping to avoid this
130131
limit.
131132

@@ -151,7 +152,8 @@ separate `analyzer` is specified.
151152
==== `wildcard` rule parameters
152153

153154
The `wildcard` rule matches terms using a wildcard pattern. This pattern can
154-
expand to match at most 128 terms. If the pattern matches more than 128 terms,
155+
expand to match at most `indices.query.bool.max_clause_count`
156+
<<search-settings,search setting>> terms. If the pattern matches more terms,
155157
{es} returns an error.
156158

157159
`pattern`::
@@ -184,8 +186,9 @@ The `pattern` is normalized using the search analyzer from this field, unless
184186
==== `regexp` rule parameters
185187

186188
The `regexp` rule matches terms using a regular expression pattern.
187-
This pattern can expand to match at most 128 terms.
188-
If the pattern matches more than 128 terms,{es} returns an error.
189+
This pattern can expand to match at most `indices.query.bool.max_clause_count`
190+
<<search-settings,search setting>> terms.
191+
If the pattern matches more terms,{es} returns an error.
189192

190193
`pattern`::
191194
(Required, string) Regexp pattern used to find matching terms.
@@ -215,7 +218,8 @@ The `pattern` is normalized using the search analyzer from this field, unless
215218

216219
The `fuzzy` rule matches terms that are similar to the provided term, within an
217220
edit distance defined by <<fuzziness>>. If the fuzzy expansion matches more than
218-
128 terms, {es} returns an error.
221+
`indices.query.bool.max_clause_count`
222+
<<search-settings,search setting>> terms, {es} returns an error.
219223

220224
`term`::
221225
(Required, string) The term to match
@@ -250,8 +254,9 @@ The `term` is normalized using the search analyzer from this field, unless
250254
==== `range` rule parameters
251255

252256
The `range` rule matches terms contained within a provided range.
253-
This range can expand to match at most 128 terms.
254-
If the range matches more than 128 terms,{es} returns an error.
257+
This range can expand to match at most `indices.query.bool.max_clause_count`
258+
<<search-settings,search setting>> terms.
259+
If the range matches more terms,{es} returns an error.
255260

256261
`gt`::
257262
(Optional, string) Greater than: match terms greater than the provided term.

modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import org.apache.lucene.queries.intervals.IntervalsSource;
2121
import org.apache.lucene.search.ConstantScoreQuery;
2222
import org.apache.lucene.search.FuzzyQuery;
23+
import org.apache.lucene.search.IndexSearcher;
2324
import org.apache.lucene.search.MatchAllDocsQuery;
2425
import org.apache.lucene.search.MultiTermQuery;
2526
import org.apache.lucene.search.PrefixQuery;
@@ -272,7 +273,8 @@ public IntervalsSource termIntervals(BytesRef term, SearchExecutionContext conte
272273

273274
@Override
274275
public IntervalsSource prefixIntervals(BytesRef term, SearchExecutionContext context) {
275-
return toIntervalsSource(Intervals.prefix(term), new PrefixQuery(new Term(name(), term)), context);
276+
return toIntervalsSource(
277+
Intervals.prefix(term, IndexSearcher.getMaxClauseCount()), new PrefixQuery(new Term(name(), term)), context);
276278
}
277279

278280
@Override
@@ -287,18 +289,18 @@ public IntervalsSource fuzzyIntervals(
287289
new Term(name(), term),
288290
maxDistance,
289291
prefixLength,
290-
128,
292+
IndexSearcher.getMaxClauseCount(),
291293
transpositions,
292294
MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE
293295
);
294-
IntervalsSource fuzzyIntervals = Intervals.multiterm(fuzzyQuery.getAutomata(), term);
296+
IntervalsSource fuzzyIntervals = Intervals.multiterm(fuzzyQuery.getAutomata(), IndexSearcher.getMaxClauseCount(), term);
295297
return toIntervalsSource(fuzzyIntervals, fuzzyQuery, context);
296298
}
297299

298300
@Override
299301
public IntervalsSource wildcardIntervals(BytesRef pattern, SearchExecutionContext context) {
300302
return toIntervalsSource(
301-
Intervals.wildcard(pattern),
303+
Intervals.wildcard(pattern, IndexSearcher.getMaxClauseCount()),
302304
new MatchAllDocsQuery(), // wildcard queries can be expensive, what should the approximation be?
303305
context
304306
);
@@ -307,7 +309,7 @@ public IntervalsSource wildcardIntervals(BytesRef pattern, SearchExecutionContex
307309
@Override
308310
public IntervalsSource regexpIntervals(BytesRef pattern, SearchExecutionContext context) {
309311
return toIntervalsSource(
310-
Intervals.regexp(pattern),
312+
Intervals.regexp(pattern, IndexSearcher.getMaxClauseCount()),
311313
new MatchAllDocsQuery(), // regexp queries can be expensive, what should the approximation be?
312314
context
313315
);
@@ -322,7 +324,7 @@ public IntervalsSource rangeIntervals(
322324
SearchExecutionContext context
323325
) {
324326
return toIntervalsSource(
325-
Intervals.range(lowerTerm, upperTerm, includeLower, includeUpper),
327+
Intervals.range(lowerTerm, upperTerm, includeLower, includeUpper, IndexSearcher.getMaxClauseCount()),
326328
new MatchAllDocsQuery(), // range queries can be expensive, what should the approximation be?
327329
context
328330
);

server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
import org.apache.lucene.search.BooleanQuery;
3636
import org.apache.lucene.search.ConstantScoreQuery;
3737
import org.apache.lucene.search.FuzzyQuery;
38+
import org.apache.lucene.search.IndexSearcher;
3839
import org.apache.lucene.search.MultiPhraseQuery;
3940
import org.apache.lucene.search.MultiTermQuery;
4041
import org.apache.lucene.search.PhraseQuery;
@@ -620,7 +621,8 @@ public IntervalsSource intervals(BytesRef term) {
620621
return Intervals.fixField(name(), Intervals.term(term));
621622
}
622623
String wildcardTerm = term.utf8ToString() + "?".repeat(Math.max(0, minChars - term.length));
623-
return Intervals.or(Intervals.fixField(name(), Intervals.wildcard(new BytesRef(wildcardTerm))), Intervals.term(term));
624+
return Intervals.or(Intervals.fixField(name(),
625+
Intervals.wildcard(new BytesRef(wildcardTerm), IndexSearcher.getMaxClauseCount())), Intervals.term(term));
624626
}
625627

626628
@Override
@@ -822,7 +824,7 @@ public IntervalsSource prefixIntervals(BytesRef term, SearchExecutionContext con
822824
if (prefixFieldType != null) {
823825
return prefixFieldType.intervals(term);
824826
}
825-
return Intervals.prefix(term);
827+
return Intervals.prefix(term, IndexSearcher.getMaxClauseCount());
826828
}
827829

828830
@Override
@@ -836,24 +838,25 @@ public IntervalsSource fuzzyIntervals(
836838
if (getTextSearchInfo().hasPositions() == false) {
837839
throw new IllegalArgumentException("Cannot create intervals over field [" + name() + "] with no positions indexed");
838840
}
839-
FuzzyQuery fq = new FuzzyQuery(new Term(name(), term), maxDistance, prefixLength, 128, transpositions);
840-
return Intervals.multiterm(fq.getAutomata(), term);
841+
FuzzyQuery fq = new FuzzyQuery(
842+
new Term(name(), term), maxDistance, prefixLength, IndexSearcher.getMaxClauseCount(), transpositions);
843+
return Intervals.multiterm(fq.getAutomata(), IndexSearcher.getMaxClauseCount(), term);
841844
}
842845

843846
@Override
844847
public IntervalsSource wildcardIntervals(BytesRef pattern, SearchExecutionContext context) {
845848
if (getTextSearchInfo().hasPositions() == false) {
846849
throw new IllegalArgumentException("Cannot create intervals over field [" + name() + "] with no positions indexed");
847850
}
848-
return Intervals.wildcard(pattern);
851+
return Intervals.wildcard(pattern, IndexSearcher.getMaxClauseCount());
849852
}
850853

851854
@Override
852855
public IntervalsSource regexpIntervals(BytesRef pattern, SearchExecutionContext context) {
853856
if (getTextSearchInfo().hasPositions() == false) {
854857
throw new IllegalArgumentException("Cannot create intervals over field [" + name() + "] with no positions indexed");
855858
}
856-
return Intervals.regexp(pattern);
859+
return Intervals.regexp(pattern, IndexSearcher.getMaxClauseCount());
857860
}
858861

859862
@Override
@@ -867,7 +870,7 @@ public IntervalsSource rangeIntervals(
867870
if (getTextSearchInfo().hasPositions() == false) {
868871
throw new IllegalArgumentException("Cannot create intervals over field [" + name() + "] with no positions indexed");
869872
}
870-
return Intervals.range(lowerTerm, upperTerm, includeLower, includeUpper);
873+
return Intervals.range(lowerTerm, upperTerm, includeLower, includeUpper, IndexSearcher.getMaxClauseCount());
871874
}
872875

873876
private void checkForPositions() {

server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java

Lines changed: 61 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,22 @@
88

99
package org.elasticsearch.index.query;
1010

11+
import org.apache.lucene.analysis.core.KeywordAnalyzer;
12+
import org.apache.lucene.document.Field;
13+
import org.apache.lucene.document.TextField;
14+
import org.apache.lucene.index.IndexReader;
1115
import org.apache.lucene.index.Term;
1216
import org.apache.lucene.queries.intervals.IntervalQuery;
1317
import org.apache.lucene.queries.intervals.Intervals;
1418
import org.apache.lucene.queries.intervals.IntervalsSource;
1519
import org.apache.lucene.search.BoostQuery;
1620
import org.apache.lucene.search.FuzzyQuery;
21+
import org.apache.lucene.search.IndexSearcher;
1722
import org.apache.lucene.search.MatchNoDocsQuery;
1823
import org.apache.lucene.search.Query;
24+
import org.apache.lucene.search.ScoreMode;
25+
import org.apache.lucene.store.Directory;
26+
import org.apache.lucene.tests.index.RandomIndexWriter;
1927
import org.apache.lucene.util.BytesRef;
2028
import org.elasticsearch.common.ParsingException;
2129
import org.elasticsearch.common.Strings;
@@ -33,7 +41,9 @@
3341
import java.util.Collections;
3442
import java.util.List;
3543

44+
import static java.util.Collections.singleton;
3645
import static org.elasticsearch.xcontent.XContentFactory.jsonBuilder;
46+
import static org.hamcrest.CoreMatchers.containsString;
3747
import static org.hamcrest.Matchers.equalTo;
3848
import static org.hamcrest.Matchers.instanceOf;
3949

@@ -605,7 +615,7 @@ public void testPrefixes() throws IOException {
605615
}
606616
}""", TEXT_FIELD_NAME);
607617
IntervalQueryBuilder builder = (IntervalQueryBuilder) parseQuery(json);
608-
Query expected = new IntervalQuery(TEXT_FIELD_NAME, Intervals.prefix(new BytesRef("term")));
618+
Query expected = new IntervalQuery(TEXT_FIELD_NAME, Intervals.prefix(new BytesRef("term"), IndexSearcher.getMaxClauseCount()));
609619
assertEquals(expected, builder.toQuery(createSearchExecutionContext()));
610620

611621
String no_positions_json = Strings.format("""
@@ -666,7 +676,8 @@ public void testPrefixes() throws IOException {
666676
builder = (IntervalQueryBuilder) parseQuery(short_prefix_json);
667677
expected = new IntervalQuery(
668678
PREFIXED_FIELD,
669-
Intervals.or(Intervals.fixField(PREFIXED_FIELD + "._index_prefix", Intervals.wildcard(new BytesRef("t?"))), Intervals.term("t"))
679+
Intervals.or(Intervals.fixField(PREFIXED_FIELD + "._index_prefix",
680+
Intervals.wildcard(new BytesRef("t?"), IndexSearcher.getMaxClauseCount())), Intervals.term("t"))
670681
);
671682
assertEquals(expected, builder.toQuery(createSearchExecutionContext()));
672683

@@ -738,7 +749,7 @@ public void testRegexp() throws IOException {
738749
}""", TEXT_FIELD_NAME);
739750

740751
IntervalQueryBuilder builder = (IntervalQueryBuilder) parseQuery(json);
741-
Query expected = new IntervalQuery(TEXT_FIELD_NAME, Intervals.regexp(new BytesRef("te.*m")));
752+
Query expected = new IntervalQuery(TEXT_FIELD_NAME, Intervals.regexp(new BytesRef("te.*m"), IndexSearcher.getMaxClauseCount()));
742753
assertEquals(expected, builder.toQuery(createSearchExecutionContext()));
743754

744755
String no_positions_json = Strings.format("""
@@ -770,7 +781,8 @@ public void testRegexp() throws IOException {
770781
}""", TEXT_FIELD_NAME);
771782

772783
builder = (IntervalQueryBuilder) parseQuery(fixed_field_json);
773-
expected = new IntervalQuery(TEXT_FIELD_NAME, Intervals.fixField(MASKED_FIELD, Intervals.regexp(new BytesRef("te.*m"))));
784+
expected = new IntervalQuery(TEXT_FIELD_NAME, Intervals.fixField(MASKED_FIELD,
785+
Intervals.regexp(new BytesRef("te.*m"), IndexSearcher.getMaxClauseCount())));
774786
assertEquals(expected, builder.toQuery(createSearchExecutionContext()));
775787

776788
String fixed_field_json_no_positions = Strings.format("""
@@ -790,6 +802,38 @@ public void testRegexp() throws IOException {
790802
});
791803
}
792804

805+
public void testMaxExpansionExceptionFailure() throws Exception {
806+
IntervalsSourceProvider provider1 = new IntervalsSourceProvider.Prefix("bar", "keyword", null);
807+
IntervalsSourceProvider provider2 = new IntervalsSourceProvider.Wildcard("bar*", "keyword", null);
808+
IntervalsSourceProvider provider3 = new IntervalsSourceProvider.Fuzzy("bar", 0, true, Fuzziness.fromEdits(1), "keyword",null);
809+
IntervalsSourceProvider provider4 = new IntervalsSourceProvider.Regexp("bar.*", "keyword", null);
810+
IntervalsSourceProvider provider5 = new IntervalsSourceProvider.Range("bar", "bar2", true, true, "keyword", null);
811+
IntervalsSourceProvider provider = randomFrom(provider1, provider2, provider3, provider4, provider5);
812+
813+
try (Directory directory = newDirectory()) {
814+
try (RandomIndexWriter iw = new RandomIndexWriter(random(), directory, new KeywordAnalyzer())) {
815+
for (int i = 0; i < 3; i++) {
816+
iw.addDocument(singleton(new TextField(TEXT_FIELD_NAME, "bar" + i, Field.Store.NO)));
817+
}
818+
try (IndexReader reader = iw.getReader()) {
819+
int origBoolMaxClauseCount = IndexSearcher.getMaxClauseCount();
820+
IndexSearcher.setMaxClauseCount(1);
821+
try {
822+
823+
IntervalQueryBuilder queryBuilder = new IntervalQueryBuilder(TEXT_FIELD_NAME, provider);
824+
IndexSearcher searcher = newSearcher(reader);
825+
Query query = queryBuilder.toQuery(createSearchExecutionContext(searcher));
826+
RuntimeException exc = expectThrows(RuntimeException.class,
827+
() -> query.createWeight(searcher, ScoreMode.COMPLETE, 1.0f).scorer(searcher.getLeafContexts().get(0)));
828+
assertThat(exc.getMessage(), containsString("expanded to too many terms (limit 1)"));
829+
} finally {
830+
IndexSearcher.setMaxClauseCount(origBoolMaxClauseCount);
831+
}
832+
}
833+
}
834+
}
835+
}
836+
793837
public void testWildcard() throws IOException {
794838
String json = Strings.format("""
795839
{
@@ -803,7 +847,7 @@ public void testWildcard() throws IOException {
803847
}""", TEXT_FIELD_NAME);
804848

805849
IntervalQueryBuilder builder = (IntervalQueryBuilder) parseQuery(json);
806-
Query expected = new IntervalQuery(TEXT_FIELD_NAME, Intervals.wildcard(new BytesRef("te?m")));
850+
Query expected = new IntervalQuery(TEXT_FIELD_NAME, Intervals.wildcard(new BytesRef("te?m"), IndexSearcher.getMaxClauseCount()));
807851
assertEquals(expected, builder.toQuery(createSearchExecutionContext()));
808852

809853
String no_positions_json = Strings.format("""
@@ -835,7 +879,7 @@ public void testWildcard() throws IOException {
835879
}""", TEXT_FIELD_NAME);
836880

837881
builder = (IntervalQueryBuilder) parseQuery(keyword_json);
838-
expected = new IntervalQuery(TEXT_FIELD_NAME, Intervals.wildcard(new BytesRef("Te?m")));
882+
expected = new IntervalQuery(TEXT_FIELD_NAME, Intervals.wildcard(new BytesRef("Te?m"), IndexSearcher.getMaxClauseCount()));
839883
assertEquals(expected, builder.toQuery(createSearchExecutionContext()));
840884

841885
String fixed_field_json = Strings.format("""
@@ -851,7 +895,8 @@ public void testWildcard() throws IOException {
851895
}""", TEXT_FIELD_NAME);
852896

853897
builder = (IntervalQueryBuilder) parseQuery(fixed_field_json);
854-
expected = new IntervalQuery(TEXT_FIELD_NAME, Intervals.fixField(MASKED_FIELD, Intervals.wildcard(new BytesRef("te?m"))));
898+
expected = new IntervalQuery(TEXT_FIELD_NAME, Intervals.fixField(MASKED_FIELD,
899+
Intervals.wildcard(new BytesRef("te?m"), IndexSearcher.getMaxClauseCount())));
855900
assertEquals(expected, builder.toQuery(createSearchExecutionContext()));
856901

857902
String fixed_field_json_no_positions = Strings.format("""
@@ -884,13 +929,15 @@ public void testWildcard() throws IOException {
884929
}""", TEXT_FIELD_NAME);
885930

886931
builder = (IntervalQueryBuilder) parseQuery(fixed_field_analyzer_json);
887-
expected = new IntervalQuery(TEXT_FIELD_NAME, Intervals.fixField(MASKED_FIELD, Intervals.wildcard(new BytesRef("Te?m"))));
932+
expected = new IntervalQuery(TEXT_FIELD_NAME, Intervals.fixField(MASKED_FIELD,
933+
Intervals.wildcard(new BytesRef("Te?m"), IndexSearcher.getMaxClauseCount())));
888934
assertEquals(expected, builder.toQuery(createSearchExecutionContext()));
889935
}
890936

891937
private static IntervalsSource buildFuzzySource(String term, String label, int prefixLength, boolean transpositions, int editDistance) {
892-
FuzzyQuery fq = new FuzzyQuery(new Term("field", term), editDistance, prefixLength, 128, transpositions);
893-
return Intervals.multiterm(fq.getAutomata(), label);
938+
FuzzyQuery fq =
939+
new FuzzyQuery(new Term("field", term), editDistance, prefixLength, IndexSearcher.getMaxClauseCount(), transpositions);
940+
return Intervals.multiterm(fq.getAutomata(), IndexSearcher.getMaxClauseCount(), label);
894941
}
895942

896943
public void testFuzzy() throws IOException {
@@ -1010,7 +1057,8 @@ public void testRange() throws IOException {
10101057
}
10111058
}""", TEXT_FIELD_NAME);
10121059
IntervalQueryBuilder builder = (IntervalQueryBuilder) parseQuery(json);
1013-
Query expected = new IntervalQuery(TEXT_FIELD_NAME, Intervals.range(new BytesRef("aaa"), new BytesRef("aab"), true, true));
1060+
Query expected = new IntervalQuery(TEXT_FIELD_NAME,
1061+
Intervals.range(new BytesRef("aaa"), new BytesRef("aab"), true, true, IndexSearcher.getMaxClauseCount()));
10141062
assertEquals(expected, builder.toQuery(createSearchExecutionContext()));
10151063

10161064
json = Strings.format("""
@@ -1025,7 +1073,8 @@ public void testRange() throws IOException {
10251073
}
10261074
}""", TEXT_FIELD_NAME);
10271075
builder = (IntervalQueryBuilder) parseQuery(json);
1028-
expected = new IntervalQuery(TEXT_FIELD_NAME, Intervals.range(new BytesRef("aaa"), new BytesRef("aab"), false, false));
1076+
expected = new IntervalQuery(TEXT_FIELD_NAME,
1077+
Intervals.range(new BytesRef("aaa"), new BytesRef("aab"), false, false, IndexSearcher.getMaxClauseCount()));
10291078
assertEquals(expected, builder.toQuery(createSearchExecutionContext()));
10301079

10311080
String incomplete_range = Strings.format("""

0 commit comments

Comments
 (0)