Skip to content

Commit 3e51698

Browse files
authored
Fix termStats posting usage (#114644) (#114710)
1 parent cbf46a3 commit 3e51698

File tree

2 files changed

+36
-41
lines changed

2 files changed

+36
-41
lines changed

server/src/main/java/org/elasticsearch/script/ScriptTermStats.java

Lines changed: 27 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,8 @@
1212
import org.apache.lucene.index.LeafReaderContext;
1313
import org.apache.lucene.index.PostingsEnum;
1414
import org.apache.lucene.index.Term;
15-
import org.apache.lucene.index.TermState;
1615
import org.apache.lucene.index.TermStates;
17-
import org.apache.lucene.index.TermsEnum;
16+
import org.apache.lucene.search.DocIdSetIterator;
1817
import org.apache.lucene.search.IndexSearcher;
1918
import org.elasticsearch.common.util.CachedSupplier;
2019
import org.elasticsearch.features.NodeFeature;
@@ -71,17 +70,15 @@ public int uniqueTermsCount() {
7170
public int matchedTermsCount() {
7271
final int docId = docIdSupplier.getAsInt();
7372
int matchedTerms = 0;
73+
advancePostings(docId);
7474

75-
try {
76-
for (PostingsEnum postingsEnum : postingsSupplier.get()) {
77-
if (postingsEnum != null && postingsEnum.advance(docId) == docId && postingsEnum.freq() > 0) {
78-
matchedTerms++;
79-
}
75+
for (PostingsEnum postingsEnum : postingsSupplier.get()) {
76+
if (postingsEnum != null && postingsEnum.docID() == docId) {
77+
matchedTerms++;
8078
}
81-
return matchedTerms;
82-
} catch (IOException e) {
83-
throw new UncheckedIOException(e);
8479
}
80+
81+
return matchedTerms;
8582
}
8683

8784
/**
@@ -150,8 +147,9 @@ public StatsSummary termFreq() {
150147
final int docId = docIdSupplier.getAsInt();
151148

152149
try {
150+
advancePostings(docId);
153151
for (PostingsEnum postingsEnum : postingsSupplier.get()) {
154-
if (postingsEnum == null || postingsEnum.advance(docId) != docId) {
152+
if (postingsEnum == null || postingsEnum.docID() != docId) {
155153
statsSummary.accept(0);
156154
} else {
157155
statsSummary.accept(postingsEnum.freq());
@@ -170,12 +168,13 @@ public StatsSummary termFreq() {
170168
* @return statistics on termPositions for the terms of the query in the current dac
171169
*/
172170
public StatsSummary termPositions() {
173-
try {
174-
statsSummary.reset();
175-
int docId = docIdSupplier.getAsInt();
171+
statsSummary.reset();
172+
int docId = docIdSupplier.getAsInt();
176173

174+
try {
175+
advancePostings(docId);
177176
for (PostingsEnum postingsEnum : postingsSupplier.get()) {
178-
if (postingsEnum == null || postingsEnum.advance(docId) != docId) {
177+
if (postingsEnum == null || postingsEnum.docID() != docId) {
179178
continue;
180179
}
181180
for (int i = 0; i < postingsEnum.freq(); i++) {
@@ -206,30 +205,26 @@ private TermStates[] loadTermContexts() {
206205
private PostingsEnum[] loadPostings() {
207206
try {
208207
PostingsEnum[] postings = new PostingsEnum[terms.length];
209-
TermStates[] contexts = termContextsSupplier.get();
210208

211209
for (int i = 0; i < terms.length; i++) {
212-
TermStates termStates = contexts[i];
213-
if (termStates.docFreq() == 0) {
214-
postings[i] = null;
215-
continue;
216-
}
217-
218-
TermState state = termStates.get(leafReaderContext);
219-
if (state == null) {
220-
postings[i] = null;
221-
continue;
222-
}
223-
224-
TermsEnum termsEnum = leafReaderContext.reader().terms(terms[i].field()).iterator();
225-
termsEnum.seekExact(terms[i].bytes(), state);
226-
227-
postings[i] = termsEnum.postings(null, PostingsEnum.ALL);
210+
postings[i] = leafReaderContext.reader().postings(terms[i], PostingsEnum.POSITIONS);
228211
}
229212

230213
return postings;
231214
} catch (IOException e) {
232215
throw new UncheckedIOException(e);
233216
}
234217
}
218+
219+
private void advancePostings(int targetDocId) {
220+
try {
221+
for (PostingsEnum posting : postingsSupplier.get()) {
222+
if (posting != null && posting.docID() < targetDocId && posting.docID() != DocIdSetIterator.NO_MORE_DOCS) {
223+
posting.advance(targetDocId);
224+
}
225+
}
226+
} catch (IOException e) {
227+
throw new UncheckedIOException(e);
228+
}
229+
}
235230
}

server/src/test/java/org/elasticsearch/script/ScriptTermStatsTests.java

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,9 @@ public void testMatchedTermsCount() throws IOException {
4848

4949
// Partial match
5050
assertAllDocs(
51-
Set.of(new Term("field", "foo"), new Term("field", "baz")),
51+
Set.of(new Term("field", "foo"), new Term("field", "qux"), new Term("field", "baz")),
5252
ScriptTermStats::matchedTermsCount,
53-
Map.of("doc-1", equalTo(1), "doc-2", equalTo(1), "doc-3", equalTo(0))
53+
Map.of("doc-1", equalTo(2), "doc-2", equalTo(1), "doc-3", equalTo(0))
5454
);
5555

5656
// Always returns 0 when no term is provided.
@@ -211,12 +211,12 @@ public void testTermFreq() throws IOException {
211211
// With missing terms
212212
{
213213
assertAllDocs(
214-
Set.of(new Term("field", "foo"), new Term("field", "baz")),
214+
Set.of(new Term("field", "foo"), new Term("field", "qux"), new Term("field", "baz")),
215215
ScriptTermStats::termFreq,
216216
Map.ofEntries(
217-
Map.entry("doc-1", equalTo(new StatsSummary(2, 1, 0, 1))),
218-
Map.entry("doc-2", equalTo(new StatsSummary(2, 2, 0, 2))),
219-
Map.entry("doc-3", equalTo(new StatsSummary(2, 0, 0, 0)))
217+
Map.entry("doc-1", equalTo(new StatsSummary(3, 2, 0, 1))),
218+
Map.entry("doc-2", equalTo(new StatsSummary(3, 2, 0, 2))),
219+
Map.entry("doc-3", equalTo(new StatsSummary(3, 0, 0, 0)))
220220
)
221221
);
222222
}
@@ -274,10 +274,10 @@ public void testTermPositions() throws IOException {
274274
// With missing terms
275275
{
276276
assertAllDocs(
277-
Set.of(new Term("field", "foo"), new Term("field", "baz")),
277+
Set.of(new Term("field", "foo"), new Term("field", "qux"), new Term("field", "baz")),
278278
ScriptTermStats::termPositions,
279279
Map.ofEntries(
280-
Map.entry("doc-1", equalTo(new StatsSummary(1, 1, 1, 1))),
280+
Map.entry("doc-1", equalTo(new StatsSummary(2, 4, 1, 3))),
281281
Map.entry("doc-2", equalTo(new StatsSummary(2, 3, 1, 2))),
282282
Map.entry("doc-3", equalTo(new StatsSummary()))
283283
)
@@ -311,7 +311,7 @@ private void withIndexSearcher(CheckedConsumer<IndexSearcher, IOException> consu
311311

312312
Document doc = new Document();
313313
doc.add(new TextField("id", "doc-1", Field.Store.YES));
314-
doc.add(new TextField("field", "foo bar", Field.Store.YES));
314+
doc.add(new TextField("field", "foo bar qux", Field.Store.YES));
315315
w.addDocument(doc);
316316

317317
doc = new Document();

0 commit comments

Comments
 (0)