Skip to content

Commit 7150729

Browse files
Multi term intervals: increase max_expansions (#112826)
Currently multi term interval queries (prefix, wildcard, fuzzy, regexp and range) can expand maximum to 128 terms. To reach parity with span queries that we want to deprecate, set max expansions to indices.query.bool.max_clause_count which is used in span queries. Relates to #110491
1 parent 9eec2c4 commit 7150729

File tree

8 files changed

+205
-49
lines changed

8 files changed

+205
-49
lines changed

docs/changelog/112826.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 112826
2+
summary: "Multi term intervals: increase max_expansions"
3+
area: Search
4+
type: enhancement
5+
issues:
6+
- 110491

docs/reference/query-dsl/intervals-query.asciidoc

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -124,8 +124,9 @@ unstemmed ones.
124124
==== `prefix` rule parameters
125125

126126
The `prefix` rule matches terms that start with a specified set of characters.
127-
This prefix can expand to match at most 128 terms. If the prefix matches more
128-
than 128 terms, {es} returns an error. You can use the
127+
This prefix can expand to match at most `indices.query.bool.max_clause_count`
128+
<<search-settings,search setting>> terms. If the prefix matches more terms,
129+
{es} returns an error. You can use the
129130
<<index-prefixes,`index-prefixes`>> option in the field mapping to avoid this
130131
limit.
131132

@@ -151,7 +152,8 @@ separate `analyzer` is specified.
151152
==== `wildcard` rule parameters
152153

153154
The `wildcard` rule matches terms using a wildcard pattern. This pattern can
154-
expand to match at most 128 terms. If the pattern matches more than 128 terms,
155+
expand to match at most `indices.query.bool.max_clause_count`
156+
<<search-settings,search setting>> terms. If the pattern matches more terms,
155157
{es} returns an error.
156158

157159
`pattern`::
@@ -184,8 +186,9 @@ The `pattern` is normalized using the search analyzer from this field, unless
184186
==== `regexp` rule parameters
185187

186188
The `regexp` rule matches terms using a regular expression pattern.
187-
This pattern can expand to match at most 128 terms.
188-
If the pattern matches more than 128 terms,{es} returns an error.
189+
This pattern can expand to match at most `indices.query.bool.max_clause_count`
190+
<<search-settings,search setting>> terms.
191+
If the pattern matches more terms,{es} returns an error.
189192

190193
`pattern`::
191194
(Required, string) Regexp pattern used to find matching terms.
@@ -215,7 +218,8 @@ The `pattern` is normalized using the search analyzer from this field, unless
215218

216219
The `fuzzy` rule matches terms that are similar to the provided term, within an
217220
edit distance defined by <<fuzziness>>. If the fuzzy expansion matches more than
218-
128 terms, {es} returns an error.
221+
`indices.query.bool.max_clause_count`
222+
<<search-settings,search setting>> terms, {es} returns an error.
219223

220224
`term`::
221225
(Required, string) The term to match
@@ -250,8 +254,9 @@ The `term` is normalized using the search analyzer from this field, unless
250254
==== `range` rule parameters
251255

252256
The `range` rule matches terms contained within a provided range.
253-
This range can expand to match at most 128 terms.
254-
If the range matches more than 128 terms,{es} returns an error.
257+
This range can expand to match at most `indices.query.bool.max_clause_count`
258+
<<search-settings,search setting>> terms.
259+
If the range matches more terms,{es} returns an error.
255260

256261
`gt`::
257262
(Optional, string) Greater than: match terms greater than the provided term.

modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import org.apache.lucene.queries.intervals.IntervalsSource;
2222
import org.apache.lucene.search.ConstantScoreQuery;
2323
import org.apache.lucene.search.FuzzyQuery;
24+
import org.apache.lucene.search.IndexSearcher;
2425
import org.apache.lucene.search.MatchAllDocsQuery;
2526
import org.apache.lucene.search.MultiTermQuery;
2627
import org.apache.lucene.search.PrefixQuery;
@@ -270,7 +271,11 @@ public IntervalsSource termIntervals(BytesRef term, SearchExecutionContext conte
270271

271272
@Override
272273
public IntervalsSource prefixIntervals(BytesRef term, SearchExecutionContext context) {
273-
return toIntervalsSource(Intervals.prefix(term), new PrefixQuery(new Term(name(), term)), context);
274+
return toIntervalsSource(
275+
Intervals.prefix(term, IndexSearcher.getMaxClauseCount()),
276+
new PrefixQuery(new Term(name(), term)),
277+
context
278+
);
274279
}
275280

276281
@Override
@@ -285,18 +290,18 @@ public IntervalsSource fuzzyIntervals(
285290
new Term(name(), term),
286291
maxDistance,
287292
prefixLength,
288-
128,
293+
IndexSearcher.getMaxClauseCount(),
289294
transpositions,
290295
MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE
291296
);
292-
IntervalsSource fuzzyIntervals = Intervals.multiterm(fuzzyQuery.getAutomata(), term);
297+
IntervalsSource fuzzyIntervals = Intervals.multiterm(fuzzyQuery.getAutomata(), IndexSearcher.getMaxClauseCount(), term);
293298
return toIntervalsSource(fuzzyIntervals, fuzzyQuery, context);
294299
}
295300

296301
@Override
297302
public IntervalsSource wildcardIntervals(BytesRef pattern, SearchExecutionContext context) {
298303
return toIntervalsSource(
299-
Intervals.wildcard(pattern),
304+
Intervals.wildcard(pattern, IndexSearcher.getMaxClauseCount()),
300305
new MatchAllDocsQuery(), // wildcard queries can be expensive, what should the approximation be?
301306
context
302307
);
@@ -305,7 +310,7 @@ public IntervalsSource wildcardIntervals(BytesRef pattern, SearchExecutionContex
305310
@Override
306311
public IntervalsSource regexpIntervals(BytesRef pattern, SearchExecutionContext context) {
307312
return toIntervalsSource(
308-
Intervals.regexp(pattern),
313+
Intervals.regexp(pattern, IndexSearcher.getMaxClauseCount()),
309314
new MatchAllDocsQuery(), // regexp queries can be expensive, what should the approximation be?
310315
context
311316
);
@@ -320,7 +325,7 @@ public IntervalsSource rangeIntervals(
320325
SearchExecutionContext context
321326
) {
322327
return toIntervalsSource(
323-
Intervals.range(lowerTerm, upperTerm, includeLower, includeUpper),
328+
Intervals.range(lowerTerm, upperTerm, includeLower, includeUpper, IndexSearcher.getMaxClauseCount()),
324329
new MatchAllDocsQuery(), // range queries can be expensive, what should the approximation be?
325330
context
326331
);

modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldTypeTests.java

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import org.apache.lucene.queries.intervals.IntervalsSource;
1515
import org.apache.lucene.search.ConstantScoreQuery;
1616
import org.apache.lucene.search.FuzzyQuery;
17+
import org.apache.lucene.search.IndexSearcher;
1718
import org.apache.lucene.search.MatchAllDocsQuery;
1819
import org.apache.lucene.search.MultiPhraseQuery;
1920
import org.apache.lucene.search.PhraseQuery;
@@ -152,30 +153,56 @@ public void testPhrasePrefixQuery() throws IOException {
152153
assertNotEquals(new MatchAllDocsQuery(), SourceConfirmedTextQuery.approximate(delegate));
153154
}
154155

155-
public void testTermIntervals() throws IOException {
156+
public void testTermIntervals() {
156157
MappedFieldType ft = new MatchOnlyTextFieldType("field");
157158
IntervalsSource termIntervals = ft.termIntervals(new BytesRef("foo"), MOCK_CONTEXT);
158159
assertThat(termIntervals, Matchers.instanceOf(SourceIntervalsSource.class));
159160
assertEquals(Intervals.term(new BytesRef("foo")), ((SourceIntervalsSource) termIntervals).getIntervalsSource());
160161
}
161162

162-
public void testPrefixIntervals() throws IOException {
163+
public void testPrefixIntervals() {
163164
MappedFieldType ft = new MatchOnlyTextFieldType("field");
164165
IntervalsSource prefixIntervals = ft.prefixIntervals(new BytesRef("foo"), MOCK_CONTEXT);
165166
assertThat(prefixIntervals, Matchers.instanceOf(SourceIntervalsSource.class));
166-
assertEquals(Intervals.prefix(new BytesRef("foo")), ((SourceIntervalsSource) prefixIntervals).getIntervalsSource());
167+
assertEquals(
168+
Intervals.prefix(new BytesRef("foo"), IndexSearcher.getMaxClauseCount()),
169+
((SourceIntervalsSource) prefixIntervals).getIntervalsSource()
170+
);
167171
}
168172

169-
public void testWildcardIntervals() throws IOException {
173+
public void testWildcardIntervals() {
170174
MappedFieldType ft = new MatchOnlyTextFieldType("field");
171175
IntervalsSource wildcardIntervals = ft.wildcardIntervals(new BytesRef("foo"), MOCK_CONTEXT);
172176
assertThat(wildcardIntervals, Matchers.instanceOf(SourceIntervalsSource.class));
173-
assertEquals(Intervals.wildcard(new BytesRef("foo")), ((SourceIntervalsSource) wildcardIntervals).getIntervalsSource());
177+
assertEquals(
178+
Intervals.wildcard(new BytesRef("foo"), IndexSearcher.getMaxClauseCount()),
179+
((SourceIntervalsSource) wildcardIntervals).getIntervalsSource()
180+
);
181+
}
182+
183+
public void testRegexpIntervals() {
184+
MappedFieldType ft = new MatchOnlyTextFieldType("field");
185+
IntervalsSource regexpIntervals = ft.regexpIntervals(new BytesRef("foo"), MOCK_CONTEXT);
186+
assertThat(regexpIntervals, Matchers.instanceOf(SourceIntervalsSource.class));
187+
assertEquals(
188+
Intervals.regexp(new BytesRef("foo"), IndexSearcher.getMaxClauseCount()),
189+
((SourceIntervalsSource) regexpIntervals).getIntervalsSource()
190+
);
174191
}
175192

176-
public void testFuzzyIntervals() throws IOException {
193+
public void testFuzzyIntervals() {
177194
MappedFieldType ft = new MatchOnlyTextFieldType("field");
178195
IntervalsSource fuzzyIntervals = ft.fuzzyIntervals("foo", 1, 2, true, MOCK_CONTEXT);
179196
assertThat(fuzzyIntervals, Matchers.instanceOf(SourceIntervalsSource.class));
180197
}
198+
199+
public void testRangeIntervals() {
200+
MappedFieldType ft = new MatchOnlyTextFieldType("field");
201+
IntervalsSource rangeIntervals = ft.rangeIntervals(new BytesRef("foo"), new BytesRef("foo1"), true, true, MOCK_CONTEXT);
202+
assertThat(rangeIntervals, Matchers.instanceOf(SourceIntervalsSource.class));
203+
assertEquals(
204+
Intervals.range(new BytesRef("foo"), new BytesRef("foo1"), true, true, IndexSearcher.getMaxClauseCount()),
205+
((SourceIntervalsSource) rangeIntervals).getIntervalsSource()
206+
);
207+
}
181208
}

server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
import org.apache.lucene.search.BooleanQuery;
3737
import org.apache.lucene.search.ConstantScoreQuery;
3838
import org.apache.lucene.search.FuzzyQuery;
39+
import org.apache.lucene.search.IndexSearcher;
3940
import org.apache.lucene.search.MultiPhraseQuery;
4041
import org.apache.lucene.search.MultiTermQuery;
4142
import org.apache.lucene.search.PhraseQuery;
@@ -620,7 +621,10 @@ public IntervalsSource intervals(BytesRef term) {
620621
return Intervals.fixField(name(), Intervals.term(term));
621622
}
622623
String wildcardTerm = term.utf8ToString() + "?".repeat(Math.max(0, minChars - term.length));
623-
return Intervals.or(Intervals.fixField(name(), Intervals.wildcard(new BytesRef(wildcardTerm))), Intervals.term(term));
624+
return Intervals.or(
625+
Intervals.fixField(name(), Intervals.wildcard(new BytesRef(wildcardTerm), IndexSearcher.getMaxClauseCount())),
626+
Intervals.term(term)
627+
);
624628
}
625629

626630
@Override
@@ -822,7 +826,7 @@ public IntervalsSource prefixIntervals(BytesRef term, SearchExecutionContext con
822826
if (prefixFieldType != null) {
823827
return prefixFieldType.intervals(term);
824828
}
825-
return Intervals.prefix(term);
829+
return Intervals.prefix(term, IndexSearcher.getMaxClauseCount());
826830
}
827831

828832
@Override
@@ -836,24 +840,30 @@ public IntervalsSource fuzzyIntervals(
836840
if (getTextSearchInfo().hasPositions() == false) {
837841
throw new IllegalArgumentException("Cannot create intervals over field [" + name() + "] with no positions indexed");
838842
}
839-
FuzzyQuery fq = new FuzzyQuery(new Term(name(), term), maxDistance, prefixLength, 128, transpositions);
840-
return Intervals.multiterm(fq.getAutomata(), term);
843+
FuzzyQuery fq = new FuzzyQuery(
844+
new Term(name(), term),
845+
maxDistance,
846+
prefixLength,
847+
IndexSearcher.getMaxClauseCount(),
848+
transpositions
849+
);
850+
return Intervals.multiterm(fq.getAutomata(), IndexSearcher.getMaxClauseCount(), term);
841851
}
842852

843853
@Override
844854
public IntervalsSource wildcardIntervals(BytesRef pattern, SearchExecutionContext context) {
845855
if (getTextSearchInfo().hasPositions() == false) {
846856
throw new IllegalArgumentException("Cannot create intervals over field [" + name() + "] with no positions indexed");
847857
}
848-
return Intervals.wildcard(pattern);
858+
return Intervals.wildcard(pattern, IndexSearcher.getMaxClauseCount());
849859
}
850860

851861
@Override
852862
public IntervalsSource regexpIntervals(BytesRef pattern, SearchExecutionContext context) {
853863
if (getTextSearchInfo().hasPositions() == false) {
854864
throw new IllegalArgumentException("Cannot create intervals over field [" + name() + "] with no positions indexed");
855865
}
856-
return Intervals.regexp(pattern);
866+
return Intervals.regexp(pattern, IndexSearcher.getMaxClauseCount());
857867
}
858868

859869
@Override
@@ -867,7 +877,7 @@ public IntervalsSource rangeIntervals(
867877
if (getTextSearchInfo().hasPositions() == false) {
868878
throw new IllegalArgumentException("Cannot create intervals over field [" + name() + "] with no positions indexed");
869879
}
870-
return Intervals.range(lowerTerm, upperTerm, includeLower, includeUpper);
880+
return Intervals.range(lowerTerm, upperTerm, includeLower, includeUpper, IndexSearcher.getMaxClauseCount());
871881
}
872882

873883
private void checkForPositions() {

server/src/test/java/org/elasticsearch/index/mapper/ConstantScoreTextFieldTypeTests.java

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import org.apache.lucene.search.BooleanQuery;
1717
import org.apache.lucene.search.ConstantScoreQuery;
1818
import org.apache.lucene.search.FuzzyQuery;
19+
import org.apache.lucene.search.IndexSearcher;
1920
import org.apache.lucene.search.PrefixQuery;
2021
import org.apache.lucene.search.Query;
2122
import org.apache.lucene.search.RegexpQuery;
@@ -231,20 +232,26 @@ public void testTermIntervals() throws IOException {
231232
public void testPrefixIntervals() throws IOException {
232233
MappedFieldType ft = createFieldType();
233234
IntervalsSource prefixIntervals = ft.prefixIntervals(new BytesRef("foo"), MOCK_CONTEXT);
234-
assertEquals(Intervals.prefix(new BytesRef("foo")), prefixIntervals);
235+
assertEquals(Intervals.prefix(new BytesRef("foo"), IndexSearcher.getMaxClauseCount()), prefixIntervals);
235236
}
236237

237238
public void testWildcardIntervals() throws IOException {
238239
MappedFieldType ft = createFieldType();
239240
IntervalsSource wildcardIntervals = ft.wildcardIntervals(new BytesRef("foo"), MOCK_CONTEXT);
240-
assertEquals(Intervals.wildcard(new BytesRef("foo")), wildcardIntervals);
241+
assertEquals(Intervals.wildcard(new BytesRef("foo"), IndexSearcher.getMaxClauseCount()), wildcardIntervals);
242+
}
243+
244+
public void testRegexpIntervals() {
245+
MappedFieldType ft = createFieldType();
246+
IntervalsSource regexpIntervals = ft.regexpIntervals(new BytesRef("foo"), MOCK_CONTEXT);
247+
assertEquals(Intervals.regexp(new BytesRef("foo"), IndexSearcher.getMaxClauseCount()), regexpIntervals);
241248
}
242249

243250
public void testFuzzyIntervals() throws IOException {
244251
MappedFieldType ft = createFieldType();
245252
IntervalsSource fuzzyIntervals = ft.fuzzyIntervals("foo", 1, 2, true, MOCK_CONTEXT);
246253
FuzzyQuery fq = new FuzzyQuery(new Term("field", "foo"), 1, 2, 128, true);
247-
IntervalsSource expectedIntervals = Intervals.multiterm(fq.getAutomata(), "foo");
254+
IntervalsSource expectedIntervals = Intervals.multiterm(fq.getAutomata(), IndexSearcher.getMaxClauseCount(), "foo");
248255
assertEquals(expectedIntervals, fuzzyIntervals);
249256
}
250257

@@ -259,6 +266,15 @@ public void testWildcardIntervalsWithIndexedPrefixes() {
259266
ConstantScoreTextFieldType ft = createFieldType();
260267
ft.setIndexPrefixes(1, 4);
261268
IntervalsSource wildcardIntervals = ft.wildcardIntervals(new BytesRef("foo"), MOCK_CONTEXT);
262-
assertEquals(Intervals.wildcard(new BytesRef("foo")), wildcardIntervals);
269+
assertEquals(Intervals.wildcard(new BytesRef("foo"), IndexSearcher.getMaxClauseCount()), wildcardIntervals);
270+
}
271+
272+
public void testRangeIntervals() {
273+
MappedFieldType ft = createFieldType();
274+
IntervalsSource rangeIntervals = ft.rangeIntervals(new BytesRef("foo"), new BytesRef("foo1"), true, true, MOCK_CONTEXT);
275+
assertEquals(
276+
Intervals.range(new BytesRef("foo"), new BytesRef("foo1"), true, true, IndexSearcher.getMaxClauseCount()),
277+
rangeIntervals
278+
);
263279
}
264280
}

server/src/test/java/org/elasticsearch/index/mapper/TextFieldTypeTests.java

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import org.apache.lucene.search.BooleanQuery;
1717
import org.apache.lucene.search.ConstantScoreQuery;
1818
import org.apache.lucene.search.FuzzyQuery;
19+
import org.apache.lucene.search.IndexSearcher;
1920
import org.apache.lucene.search.MultiTermQuery;
2021
import org.apache.lucene.search.PrefixQuery;
2122
import org.apache.lucene.search.Query;
@@ -243,20 +244,26 @@ public void testTermIntervals() throws IOException {
243244
public void testPrefixIntervals() throws IOException {
244245
MappedFieldType ft = createFieldType();
245246
IntervalsSource prefixIntervals = ft.prefixIntervals(new BytesRef("foo"), MOCK_CONTEXT);
246-
assertEquals(Intervals.prefix(new BytesRef("foo")), prefixIntervals);
247+
assertEquals(Intervals.prefix(new BytesRef("foo"), IndexSearcher.getMaxClauseCount()), prefixIntervals);
247248
}
248249

249-
public void testWildcardIntervals() throws IOException {
250+
public void testWildcardIntervals() {
250251
MappedFieldType ft = createFieldType();
251252
IntervalsSource wildcardIntervals = ft.wildcardIntervals(new BytesRef("foo"), MOCK_CONTEXT);
252-
assertEquals(Intervals.wildcard(new BytesRef("foo")), wildcardIntervals);
253+
assertEquals(Intervals.wildcard(new BytesRef("foo"), IndexSearcher.getMaxClauseCount()), wildcardIntervals);
253254
}
254255

255-
public void testFuzzyIntervals() throws IOException {
256+
public void testRegexpIntervals() {
257+
MappedFieldType ft = createFieldType();
258+
IntervalsSource regexpIntervals = ft.regexpIntervals(new BytesRef("foo"), MOCK_CONTEXT);
259+
assertEquals(Intervals.regexp(new BytesRef("foo"), IndexSearcher.getMaxClauseCount()), regexpIntervals);
260+
}
261+
262+
public void testFuzzyIntervals() {
256263
MappedFieldType ft = createFieldType();
257264
IntervalsSource fuzzyIntervals = ft.fuzzyIntervals("foo", 1, 2, true, MOCK_CONTEXT);
258265
FuzzyQuery fq = new FuzzyQuery(new Term("field", "foo"), 1, 2, 128, true);
259-
IntervalsSource expectedIntervals = Intervals.multiterm(fq.getAutomata(), "foo");
266+
IntervalsSource expectedIntervals = Intervals.multiterm(fq.getAutomata(), IndexSearcher.getMaxClauseCount(), "foo");
260267
assertEquals(expectedIntervals, fuzzyIntervals);
261268
}
262269

@@ -271,6 +278,15 @@ public void testWildcardIntervalsWithIndexedPrefixes() {
271278
TextFieldType ft = createFieldType();
272279
ft.setIndexPrefixes(1, 4);
273280
IntervalsSource wildcardIntervals = ft.wildcardIntervals(new BytesRef("foo"), MOCK_CONTEXT);
274-
assertEquals(Intervals.wildcard(new BytesRef("foo")), wildcardIntervals);
281+
assertEquals(Intervals.wildcard(new BytesRef("foo"), IndexSearcher.getMaxClauseCount()), wildcardIntervals);
282+
}
283+
284+
public void testRangeIntervals() {
285+
MappedFieldType ft = createFieldType();
286+
IntervalsSource rangeIntervals = ft.rangeIntervals(new BytesRef("foo"), new BytesRef("foo1"), true, true, MOCK_CONTEXT);
287+
assertEquals(
288+
Intervals.range(new BytesRef("foo"), new BytesRef("foo1"), true, true, IndexSearcher.getMaxClauseCount()),
289+
rangeIntervals
290+
);
275291
}
276292
}

0 commit comments

Comments
 (0)