Skip to content

Commit 0c97682

Browse files
javannajpountz
authored andcommitted
LUCENE-10395: Introduce TotalHitCountCollectorManager (#622)
1 parent e3b2efe commit 0c97682

File tree

8 files changed

+141
-38
lines changed

8 files changed

+141
-38
lines changed

lucene/CHANGES.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,10 @@ New Features
8686

8787
* LUCENE-10250: Add support for arbitrary length hierarchical SSDV facets. (Marc D'mello)
8888

89+
* LUCENE-10395: Add support for TotalHitCountCollectorManager, a collector manager
90+
based on TotalHitCountCollector that allows users to parallelize counting the
91+
number of hits. (Luca Cavanna, Adrien Grand)
92+
8993
Improvements
9094
---------------------
9195

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.lucene.search;
18+
19+
import java.io.IOException;
20+
import java.util.Collection;
21+
22+
/**
23+
* Collector manager based on {@link TotalHitCountCollector} that allows users to parallelize
24+
* counting the number of hits, expected to be used mostly wrapped in {@link MultiCollectorManager}.
25+
* For cases when this is the only collector manager used, {@link IndexSearcher#count(Query)} should
26+
* be called instead of {@link IndexSearcher#search(Query, CollectorManager)} as the former is
27+
* faster whenever the count can be returned directly from the index statistics.
28+
*/
29+
public class TotalHitCountCollectorManager
30+
implements CollectorManager<TotalHitCountCollector, Integer> {
31+
@Override
32+
public TotalHitCountCollector newCollector() throws IOException {
33+
return new TotalHitCountCollector();
34+
}
35+
36+
@Override
37+
public Integer reduce(Collection<TotalHitCountCollector> collectors) throws IOException {
38+
int totalHits = 0;
39+
for (TotalHitCountCollector collector : collectors) {
40+
totalHits += collector.getTotalHits();
41+
}
42+
return totalHits;
43+
}
44+
}

lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java

Lines changed: 33 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
import java.util.concurrent.atomic.AtomicInteger;
4040
import java.util.concurrent.atomic.AtomicLong;
4141
import java.util.concurrent.atomic.AtomicReference;
42+
import java.util.stream.Collectors;
4243
import org.apache.lucene.document.Document;
4344
import org.apache.lucene.document.Field.Store;
4445
import org.apache.lucene.document.LongPoint;
@@ -167,19 +168,36 @@ public void run() {
167168
RandomPicks.randomFrom(
168169
random(), new String[] {"blue", "red", "yellow", "green"});
169170
final Query q = new TermQuery(new Term("color", value));
170-
TotalHitCountCollector collector = new TotalHitCountCollector();
171-
searcher.search(q, collector); // will use the cache
172-
final int totalHits1 = collector.getTotalHits();
173-
TotalHitCountCollector collector2 = new TotalHitCountCollector();
174-
searcher.search(
175-
q,
176-
new FilterCollector(collector2) {
177-
@Override
178-
public ScoreMode scoreMode() {
179-
return ScoreMode.COMPLETE; // will not use the cache because of scores
180-
}
181-
});
182-
final long totalHits2 = collector2.getTotalHits();
171+
TotalHitCountCollectorManager collectorManager =
172+
new TotalHitCountCollectorManager();
173+
// will use the cache
174+
final int totalHits1 = searcher.search(q, collectorManager);
175+
final long totalHits2 =
176+
searcher.search(
177+
q,
178+
new CollectorManager<FilterCollector, Integer>() {
179+
@Override
180+
public FilterCollector newCollector() {
181+
return new FilterCollector(new TotalHitCountCollector()) {
182+
@Override
183+
public ScoreMode scoreMode() {
184+
// will not use the cache because of scores
185+
return ScoreMode.COMPLETE;
186+
}
187+
};
188+
}
189+
190+
@Override
191+
public Integer reduce(Collection<FilterCollector> collectors)
192+
throws IOException {
193+
return collectorManager.reduce(
194+
collectors.stream()
195+
.map(
196+
filterCollector ->
197+
(TotalHitCountCollector) filterCollector.in)
198+
.collect(Collectors.toList()));
199+
}
200+
});
183201
assertEquals(totalHits2, totalHits1);
184202
} finally {
185203
mgr.release(searcher);
@@ -945,7 +963,7 @@ public void onUse(Query query) {
945963

946964
searcher.setQueryCache(queryCache);
947965
searcher.setQueryCachingPolicy(policy);
948-
searcher.search(query.build(), new TotalHitCountCollector());
966+
searcher.search(query.build(), new TotalHitCountCollectorManager());
949967

950968
reader.close();
951969
dir.close();
@@ -1174,7 +1192,7 @@ public void testDetectMutatedQueries() throws IOException {
11741192

11751193
try {
11761194
// trigger an eviction
1177-
searcher.search(new MatchAllDocsQuery(), new TotalHitCountCollector());
1195+
searcher.search(new MatchAllDocsQuery(), new TotalHitCountCollectorManager());
11781196
fail();
11791197
} catch (
11801198
@SuppressWarnings("unused")

lucene/core/src/test/org/apache/lucene/search/TestSearchWithThreads.java

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -57,23 +57,20 @@ public void test() throws Exception {
5757

5858
final AtomicBoolean failed = new AtomicBoolean();
5959
final AtomicLong netSearch = new AtomicLong();
60-
60+
TotalHitCountCollectorManager collectorManager = new TotalHitCountCollectorManager();
6161
Thread[] threads = new Thread[numThreads];
6262
for (int threadID = 0; threadID < numThreads; threadID++) {
6363
threads[threadID] =
6464
new Thread() {
65-
TotalHitCountCollector col = new TotalHitCountCollector();
6665

6766
@Override
6867
public void run() {
6968
try {
7069
long totHits = 0;
7170
long totSearch = 0;
7271
for (; totSearch < numSearches & !failed.get(); totSearch++) {
73-
s.search(new TermQuery(new Term("body", "aaa")), col);
74-
totHits += col.getTotalHits();
75-
s.search(new TermQuery(new Term("body", "bbb")), col);
76-
totHits += col.getTotalHits();
72+
totHits += s.search(new TermQuery(new Term("body", "aaa")), collectorManager);
73+
totHits += s.search(new TermQuery(new Term("body", "bbb")), collectorManager);
7774
}
7875
assertTrue(totSearch > 0 && totHits > 0);
7976
netSearch.addAndGet(totSearch);

lucene/core/src/test/org/apache/lucene/search/TestTermQuery.java

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -91,16 +91,15 @@ public void testCreateWeightDoesNotSeekIfScoresAreNotNeeded() throws IOException
9191
IndexSearcher searcher = new IndexSearcher(reader);
9292
// use a collector rather than searcher.count() which would just read the
9393
// doc freq instead of creating a scorer
94-
TotalHitCountCollector collector = new TotalHitCountCollector();
95-
searcher.search(query, collector);
96-
assertEquals(1, collector.getTotalHits());
94+
TotalHitCountCollectorManager collectorManager = new TotalHitCountCollectorManager();
95+
int totalHits = searcher.search(query, collectorManager);
96+
assertEquals(1, totalHits);
9797
TermQuery queryWithContext =
9898
new TermQuery(
9999
new Term("foo", "bar"),
100100
TermStates.build(reader.getContext(), new Term("foo", "bar"), true));
101-
collector = new TotalHitCountCollector();
102-
searcher.search(queryWithContext, collector);
103-
assertEquals(1, collector.getTotalHits());
101+
totalHits = searcher.search(queryWithContext, collectorManager);
102+
assertEquals(1, totalHits);
104103

105104
IOUtils.close(reader, w, dir);
106105
}
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.lucene.search;
18+
19+
import org.apache.lucene.document.Document;
20+
import org.apache.lucene.document.Field;
21+
import org.apache.lucene.document.StringField;
22+
import org.apache.lucene.index.IndexReader;
23+
import org.apache.lucene.store.Directory;
24+
import org.apache.lucene.tests.index.RandomIndexWriter;
25+
import org.apache.lucene.tests.util.LuceneTestCase;
26+
27+
public class TestTotalHitCountCollectorManager extends LuceneTestCase {
28+
29+
public void testBasics() throws Exception {
30+
Directory indexStore = newDirectory();
31+
RandomIndexWriter writer = new RandomIndexWriter(random(), indexStore);
32+
for (int i = 0; i < 5; i++) {
33+
Document doc = new Document();
34+
doc.add(new StringField("string", "a" + i, Field.Store.NO));
35+
doc.add(new StringField("string", "b" + i, Field.Store.NO));
36+
writer.addDocument(doc);
37+
}
38+
IndexReader reader = writer.getReader();
39+
writer.close();
40+
41+
IndexSearcher searcher = newSearcher(reader, true, true, true);
42+
TotalHitCountCollectorManager collectorManager = new TotalHitCountCollectorManager();
43+
int totalHits = searcher.search(new MatchAllDocsQuery(), collectorManager);
44+
assertEquals(5, totalHits);
45+
46+
reader.close();
47+
indexStore.close();
48+
}
49+
}

lucene/misc/src/test/org/apache/lucene/misc/search/TestDocValuesStatsCollector.java

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,6 @@
4545
import org.apache.lucene.misc.search.DocValuesStats.SortedSetDocValuesStats;
4646
import org.apache.lucene.search.IndexSearcher;
4747
import org.apache.lucene.search.MatchAllDocsQuery;
48-
import org.apache.lucene.search.MultiCollector;
49-
import org.apache.lucene.search.TotalHitCountCollector;
5048
import org.apache.lucene.store.Directory;
5149
import org.apache.lucene.tests.util.LuceneTestCase;
5250
import org.apache.lucene.tests.util.TestUtil;
@@ -396,10 +394,8 @@ public void testDocsWithSortedSetValues() throws IOException {
396394
try (DirectoryReader reader = DirectoryReader.open(indexWriter)) {
397395
IndexSearcher searcher = new IndexSearcher(reader);
398396
SortedSetDocValuesStats stats = new SortedSetDocValuesStats(field);
399-
TotalHitCountCollector totalHitCount = new TotalHitCountCollector();
400-
searcher.search(
401-
new MatchAllDocsQuery(),
402-
MultiCollector.wrap(totalHitCount, new DocValuesStatsCollector(stats)));
397+
398+
searcher.search(new MatchAllDocsQuery(), new DocValuesStatsCollector(stats));
403399

404400
int expCount = (int) nonNull(docValues).count();
405401
assertEquals(expCount, stats.count());

lucene/misc/src/test/org/apache/lucene/misc/search/TestMemoryAccountingBitsetCollector.java

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,6 @@
2424
import org.apache.lucene.misc.CollectorMemoryTracker;
2525
import org.apache.lucene.search.IndexSearcher;
2626
import org.apache.lucene.search.MatchAllDocsQuery;
27-
import org.apache.lucene.search.MultiCollector;
28-
import org.apache.lucene.search.TotalHitCountCollector;
2927
import org.apache.lucene.store.Directory;
3028
import org.apache.lucene.tests.index.RandomIndexWriter;
3129
import org.apache.lucene.tests.util.LuceneTestCase;
@@ -64,14 +62,12 @@ public void testMemoryAccountingBitsetCollectorMemoryLimit() {
6462
CollectorMemoryTracker tracker =
6563
new CollectorMemoryTracker("testMemoryTracker", perCollectorMemoryLimit);
6664
MemoryAccountingBitsetCollector bitSetCollector = new MemoryAccountingBitsetCollector(tracker);
67-
TotalHitCountCollector hitCountCollector = new TotalHitCountCollector();
6865

6966
IndexSearcher searcher = new IndexSearcher(reader);
7067
expectThrows(
7168
IllegalStateException.class,
7269
() -> {
73-
searcher.search(
74-
new MatchAllDocsQuery(), MultiCollector.wrap(hitCountCollector, bitSetCollector));
70+
searcher.search(new MatchAllDocsQuery(), bitSetCollector);
7571
});
7672
}
7773
}

0 commit comments

Comments
 (0)