Skip to content

Commit e3184cb

Browse files
authored
Add PhraseQuery.Builder.setMaxTerms() method to limit the maximum number of terms and excessive memory use (#15332)
* perf: Added configurable limit for PhraseQuery#builder terms to solve the problem of excessive memory usage in ultra-long text search case * perf: Added configurable term threshold for PhraseQuery#Builder to solve the problem of excessive memory usage in ultra-long text search case * perf: Added configurable term threshold for PhraseQuery#Builder to solve the problem of excessive memory usage in ultra-long text search case * perf: Added configurable term threshold for PhraseQuery#Builder to solve the problem of excessive memory usage in ultra-long text search case * add changes entry * Add PhraseQuery.Builder.setMaxTerms() method to limit the maximum number of terms and excessive memory use. * Optimizing test cases:testPhraseQueryMaxTerms#testPhraseQueryTermLimit --------- Co-authored-by: nickyulin <[email protected]>
1 parent 71e822e commit e3184cb

File tree

3 files changed

+37
-0
lines changed

3 files changed

+37
-0
lines changed

lucene/CHANGES.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,8 @@ Improvements
174174

175175
* GITHUB#15184: Refactoring internal HNSWGraphBuilder's APIs and avoid creating new scorer for each call (Patrick Zhai)
176176

177+
* GITHUB#15332: Add PhraseQuery.Builder.setMaxTerms() method to limit the maximum number of terms and excessive memory use (linyunanit)
178+
177179
Optimizations
178180
---------------------
179181
* GITHUB#15140: Optimize TopScoreDocCollector with TernaryLongHeap for improved performance over Binary-LongHeap. (Ramakrishna Chilaka)

lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,12 +74,14 @@ public class PhraseQuery extends Query {
7474
public static class Builder {
7575

7676
private int slop;
77+
private int maxTerms;
7778
private final List<Term> terms;
7879
private final IntArrayList positions;
7980

8081
/** Sole constructor. */
8182
public Builder() {
8283
slop = 0;
84+
maxTerms = -1;
8385
terms = new ArrayList<>();
8486
positions = new IntArrayList();
8587
}
@@ -94,6 +96,18 @@ public Builder setSlop(int slop) {
9496
return this;
9597
}
9698

99+
/**
100+
* Set the maximum number of terms allowed in the phrase query. This helps prevent excessive
101+
* memory usage for very long phrases.
102+
*
103+
* <p>If the number of terms added via {@link #add(Term)} or {@link #add(Term, int)} exceeds
104+
* this threshold, an {@link IllegalArgumentException} will be thrown.
105+
*/
106+
public Builder setMaxTerms(int maxTerms) {
107+
this.maxTerms = maxTerms;
108+
return this;
109+
}
110+
97111
/**
98112
* Adds a term to the end of the query phrase. The relative position of the term is the one
99113
* immediately after the last term added.
@@ -128,6 +142,13 @@ public Builder add(Term term, int position) {
128142
+ " and "
129143
+ terms.get(0).field());
130144
}
145+
if (maxTerms > 0 && terms.size() >= maxTerms) {
146+
throw new IllegalArgumentException(
147+
"The current number of terms is "
148+
+ terms.size()
149+
+ ", which exceeds the limit of "
150+
+ maxTerms);
151+
}
131152
terms.add(term);
132153
positions.add(position);
133154
return this;

lucene/core/src/test/org/apache/lucene/search/TestPhraseQuery.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -743,6 +743,20 @@ public void testBackwardPositions() throws Exception {
743743
});
744744
}
745745

746+
public void testPhraseQueryMaxTerms() throws Exception {
747+
PhraseQuery.Builder builder = new PhraseQuery.Builder();
748+
int termThreshold = 5;
749+
builder.setMaxTerms(termThreshold);
750+
for (int i = 0; i < termThreshold; i++) {
751+
builder.add(new Term("field", "one" + i), i);
752+
}
753+
expectThrows(
754+
IllegalArgumentException.class,
755+
() -> {
756+
builder.add(new Term("field", "three"), termThreshold);
757+
});
758+
}
759+
746760
private static final String[] DOCS =
747761
new String[] {
748762
"a b c d e f g h",

0 commit comments

Comments
 (0)