Skip to content

Commit f7a3587

Browse files
authored
LUCENE-9940: DisjunctionMaxQuery shouldn't depend on disjunct order for equals checks (#110)
DisjunctionMaxQuery stores its disjuncts in a Query[], and uses Arrays.equals() for comparisons in its equals() implementation. This means that the order in which disjuncts are added to the query matters for equality checks. This commit changes DMQ to instead store its disjuncts in a Multiset, meaning that ordering no longer matters. The getDisjuncts() method now returns a Collection<Query> rather than a List, and some tests are changed to use query equality checks rather than iterating over disjuncts and expecting a particular order.
1 parent 043ed3a commit f7a3587

File tree

4 files changed

+41
-25
lines changed

4 files changed

+41
-25
lines changed

lucene/CHANGES.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,6 @@ API Changes
109109
only applicable for fields that are indexed with doc values only. (Mayya Sharipova,
110110
Adrien Grand, Simon Willnauer)
111111

112-
113112
Improvements
114113

115114
* LUCENE-9687: Hunspell support improvements: add API for spell-checking and suggestions, support compound words,
@@ -246,6 +245,9 @@ Bug fixes
246245
* LUCENE-9930: The Ukrainian analyzer was reloading its dictionary for every new
247246
TokenStreamComponents, which could lead to memory leaks. (Alan Woodward)
248247

248+
* LUCENE-9940: The order of disjuncts in DisjunctionMaxQuery does not matter
249+
for equality checks (Alan Woodward)
250+
249251
Changes in Backwards Compatibility Policy
250252

251253
* LUCENE-9904: regenerated UAX29URLEmailTokenizer and the corresponding analyzer with up-to-date top

lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818

1919
import java.io.IOException;
2020
import java.util.ArrayList;
21-
import java.util.Arrays;
2221
import java.util.Collection;
2322
import java.util.Collections;
2423
import java.util.Iterator;
@@ -45,7 +44,7 @@
4544
public final class DisjunctionMaxQuery extends Query implements Iterable<Query> {
4645

4746
/* The subqueries */
48-
private final Query[] disjuncts;
47+
private final Multiset<Query> disjuncts = new Multiset<>();
4948

5049
/* Multiple of the non-max disjunct scores added into our final score. Non-zero values support tie-breaking. */
5150
private final float tieBreakerMultiplier;
@@ -66,7 +65,7 @@ public DisjunctionMaxQuery(Collection<Query> disjuncts, float tieBreakerMultipli
6665
throw new IllegalArgumentException("tieBreakerMultiplier must be in [0, 1]");
6766
}
6867
this.tieBreakerMultiplier = tieBreakerMultiplier;
69-
this.disjuncts = disjuncts.toArray(new Query[disjuncts.size()]);
68+
this.disjuncts.addAll(disjuncts);
7069
}
7170

7271
/** @return An {@code Iterator<Query>} over the disjuncts */
@@ -76,8 +75,8 @@ public Iterator<Query> iterator() {
7675
}
7776

7877
/** @return the disjuncts. */
79-
public List<Query> getDisjuncts() {
80-
return Collections.unmodifiableList(Arrays.asList(disjuncts));
78+
public Collection<Query> getDisjuncts() {
79+
return Collections.unmodifiableCollection(disjuncts);
8180
}
8281

8382
/** @return tie breaker value for multiple matches. */
@@ -208,8 +207,8 @@ public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float bo
208207
*/
209208
@Override
210209
public Query rewrite(IndexReader reader) throws IOException {
211-
if (disjuncts.length == 1) {
212-
return disjuncts[0];
210+
if (disjuncts.size() == 1) {
211+
return disjuncts.iterator().next();
213212
}
214213

215214
if (tieBreakerMultiplier == 1.0f) {
@@ -254,14 +253,15 @@ public void visit(QueryVisitor visitor) {
254253
public String toString(String field) {
255254
StringBuilder buffer = new StringBuilder();
256255
buffer.append("(");
257-
for (int i = 0; i < disjuncts.length; i++) {
258-
Query subquery = disjuncts[i];
256+
Iterator<Query> it = disjuncts.iterator();
257+
for (int i = 0; it.hasNext(); i++) {
258+
Query subquery = it.next();
259259
if (subquery instanceof BooleanQuery) { // wrap sub-bools in parens
260260
buffer.append("(");
261261
buffer.append(subquery.toString(field));
262262
buffer.append(")");
263263
} else buffer.append(subquery.toString(field));
264-
if (i != disjuncts.length - 1) buffer.append(" | ");
264+
if (i != disjuncts.size() - 1) buffer.append(" | ");
265265
}
266266
buffer.append(")");
267267
if (tieBreakerMultiplier != 0.0f) {
@@ -285,7 +285,7 @@ public boolean equals(Object other) {
285285

286286
private boolean equalsTo(DisjunctionMaxQuery other) {
287287
return tieBreakerMultiplier == other.tieBreakerMultiplier
288-
&& Arrays.equals(disjuncts, other.disjuncts);
288+
&& Objects.equals(disjuncts, other.disjuncts);
289289
}
290290

291291
/**
@@ -297,7 +297,7 @@ private boolean equalsTo(DisjunctionMaxQuery other) {
297297
public int hashCode() {
298298
int h = classHash();
299299
h = 31 * h + Float.floatToIntBits(tieBreakerMultiplier);
300-
h = 31 * h + Arrays.hashCode(disjuncts);
300+
h = 31 * h + Objects.hashCode(disjuncts);
301301
return h;
302302
}
303303
}

lucene/core/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -496,11 +496,21 @@ public void testRewriteBoolean() throws Exception {
496496
Query sub2 = tq("hed", "elephant");
497497
DisjunctionMaxQuery q = new DisjunctionMaxQuery(Arrays.asList(sub1, sub2), 1.0f);
498498
Query rewritten = s.rewrite(q);
499-
assertTrue(rewritten instanceof BooleanQuery);
500-
BooleanQuery bq = (BooleanQuery) rewritten;
501-
assertEquals(bq.clauses().size(), 2);
502-
assertEquals(bq.clauses().get(0), new BooleanClause(sub1, BooleanClause.Occur.SHOULD));
503-
assertEquals(bq.clauses().get(1), new BooleanClause(sub2, BooleanClause.Occur.SHOULD));
499+
Query expected =
500+
new BooleanQuery.Builder()
501+
.add(sub1, BooleanClause.Occur.SHOULD)
502+
.add(sub2, BooleanClause.Occur.SHOULD)
503+
.build();
504+
assertEquals(expected, rewritten);
505+
}
506+
507+
public void testDisjunctOrderAndEquals() throws Exception {
508+
// the order that disjuncts are provided in should not matter for equals() comparisons
509+
Query sub1 = tq("hed", "albino");
510+
Query sub2 = tq("hed", "elephant");
511+
Query q1 = new DisjunctionMaxQuery(Arrays.asList(sub1, sub2), 1.0f);
512+
Query q2 = new DisjunctionMaxQuery(Arrays.asList(sub2, sub1), 1.0f);
513+
assertEquals(q1, q2);
504514
}
505515

506516
public void testRandomTopDocs() throws Exception {

lucene/queryparser/src/test/org/apache/lucene/queryparser/xml/TestCoreParser.java

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,16 +18,19 @@
1818

1919
import java.io.IOException;
2020
import java.io.InputStream;
21+
import java.util.Arrays;
2122
import org.apache.lucene.analysis.Analyzer;
2223
import org.apache.lucene.analysis.MockAnalyzer;
2324
import org.apache.lucene.analysis.MockTokenFilter;
2425
import org.apache.lucene.analysis.MockTokenizer;
2526
import org.apache.lucene.document.Document;
2627
import org.apache.lucene.index.IndexReader;
28+
import org.apache.lucene.index.Term;
2729
import org.apache.lucene.search.DisjunctionMaxQuery;
2830
import org.apache.lucene.search.IndexSearcher;
2931
import org.apache.lucene.search.Query;
3032
import org.apache.lucene.search.ScoreDoc;
33+
import org.apache.lucene.search.TermQuery;
3134
import org.apache.lucene.search.TopDocs;
3235
import org.apache.lucene.search.spans.SpanQuery;
3336
import org.apache.lucene.util.LuceneTestCase;
@@ -99,13 +102,14 @@ public void testBooleanQueryXML() throws ParserException, IOException {
99102

100103
public void testDisjunctionMaxQueryXML() throws ParserException, IOException {
101104
Query q = parse("DisjunctionMaxQuery.xml");
102-
assertTrue(q instanceof DisjunctionMaxQuery);
103-
DisjunctionMaxQuery d = (DisjunctionMaxQuery) q;
104-
assertEquals(0.0f, d.getTieBreakerMultiplier(), 0.0001f);
105-
assertEquals(2, d.getDisjuncts().size());
106-
DisjunctionMaxQuery ndq = (DisjunctionMaxQuery) d.getDisjuncts().get(1);
107-
assertEquals(0.3f, ndq.getTieBreakerMultiplier(), 0.0001f);
108-
assertEquals(1, ndq.getDisjuncts().size());
105+
Query expected =
106+
new DisjunctionMaxQuery(
107+
Arrays.asList(
108+
new TermQuery(new Term("a", "merger")),
109+
new DisjunctionMaxQuery(
110+
Arrays.asList(new TermQuery(new Term("b", "verger"))), 0.3f)),
111+
0.0f);
112+
assertEquals(expected, q);
109113
}
110114

111115
public void testRangeQueryXML() throws ParserException, IOException {

0 commit comments

Comments
 (0)