Skip to content

Commit 5150040

Browse files
authored
Implement matches() on SourceConfirmedTextQuery (#100252)
`match_only_text` does not currently support highlighting via the matches option of the default highlighter. This commit implements matches on the backing query for this field, and also fixes a bug where the field type's value fetcher could hold on to the wrong reference for a source lookup, causing threading errors.
1 parent 68974a2 commit 5150040

File tree

6 files changed

+251
-2
lines changed

6 files changed

+251
-2
lines changed

docs/changelog/100134.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 100134
2+
summary: Implement matches() on `SourceConfirmedTextQuery`
3+
area: Highlighting
4+
type: enhancement
5+
issues: []

docs/changelog/100252.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 100252
2+
summary: Implement matches() on `SourceConfirmedTextQuery`
3+
area: Highlighting
4+
type: bug
5+
issues: []
Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0 and the Server Side Public License, v 1; you may not use this file except
5+
* in compliance with, at your election, the Elastic License 2.0 or the Server
6+
* Side Public License, v 1.
7+
*/
8+
9+
package org.elasticsearch.index.mapper;
10+
11+
import org.elasticsearch.action.bulk.BulkRequestBuilder;
12+
import org.elasticsearch.action.bulk.BulkResponse;
13+
import org.elasticsearch.action.search.SearchResponse;
14+
import org.elasticsearch.action.support.WriteRequest;
15+
import org.elasticsearch.index.mapper.extras.MapperExtrasPlugin;
16+
import org.elasticsearch.index.query.QueryBuilders;
17+
import org.elasticsearch.plugins.Plugin;
18+
import org.elasticsearch.search.SearchHit;
19+
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
20+
import org.elasticsearch.test.ESIntegTestCase;
21+
import org.elasticsearch.xcontent.XContentBuilder;
22+
import org.elasticsearch.xcontent.XContentFactory;
23+
24+
import java.io.IOException;
25+
import java.util.Arrays;
26+
import java.util.Collection;
27+
28+
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
29+
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures;
30+
import static org.elasticsearch.xcontent.XContentFactory.jsonBuilder;
31+
import static org.hamcrest.Matchers.containsString;
32+
33+
public class MatchOnlyTextMapperIT extends ESIntegTestCase {
34+
35+
protected Collection<Class<? extends Plugin>> nodePlugins() {
36+
return Arrays.asList(MapperExtrasPlugin.class);
37+
}
38+
39+
public void testHighlightingWithMatchOnlyTextFieldMatchPhrase() throws IOException {
40+
41+
// We index and retrieve a large number of documents to ensure that we go over multiple
42+
// segments, to ensure that the highlighter is using the correct segment lookups to
43+
// load the source.
44+
45+
XContentBuilder mappings = jsonBuilder();
46+
mappings.startObject().startObject("properties").startObject("message").field("type", "match_only_text").endObject().endObject();
47+
mappings.endObject();
48+
assertAcked(prepareCreate("test").setMapping(mappings));
49+
BulkRequestBuilder bulk = client().prepareBulk("test").setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE);
50+
for (int i = 0; i < 2000; i++) {
51+
bulk.add(
52+
client().prepareIndex()
53+
.setSource(
54+
XContentFactory.jsonBuilder()
55+
.startObject()
56+
.field(
57+
"message",
58+
"[.ds-.slm-history-5-2023.09.20-"
59+
+ randomInt()
60+
+ "][0] marking and sending shard failed due to [failed recovery]"
61+
)
62+
.endObject()
63+
)
64+
);
65+
}
66+
BulkResponse bulkItemResponses = bulk.get();
67+
assertNoFailures(bulkItemResponses);
68+
69+
SearchResponse searchResponse = client().prepareSearch("test")
70+
.setQuery(QueryBuilders.matchPhraseQuery("message", "marking and sending shard"))
71+
.setSize(500)
72+
.highlighter(new HighlightBuilder().field("message"))
73+
.get();
74+
assertNoFailures(searchResponse);
75+
for (SearchHit searchHit : searchResponse.getHits()) {
76+
assertThat(
77+
searchHit.getHighlightFields().get("message").fragments()[0].string(),
78+
containsString("<em>marking and sending shard</em>")
79+
);
80+
}
81+
}
82+
83+
public void testHighlightingWithMatchOnlyTextFieldSyntheticSource() throws IOException {
84+
85+
// We index and retrieve a large number of documents to ensure that we go over multiple
86+
// segments, to ensure that the highlighter is using the correct segment lookups to
87+
// load the source.
88+
89+
String mappings = """
90+
{ "_source" : { "mode" : "synthetic" },
91+
"properties" : {
92+
"message" : { "type" : "match_only_text" }
93+
}
94+
}
95+
""";
96+
assertAcked(prepareCreate("test").setMapping(mappings));
97+
BulkRequestBuilder bulk = client().prepareBulk("test").setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE);
98+
for (int i = 0; i < 2000; i++) {
99+
bulk.add(
100+
client().prepareIndex()
101+
.setSource(
102+
XContentFactory.jsonBuilder()
103+
.startObject()
104+
.field(
105+
"message",
106+
"[.ds-.slm-history-5-2023.09.20-"
107+
+ randomInt()
108+
+ "][0] marking and sending shard failed due to [failed recovery]"
109+
)
110+
.endObject()
111+
)
112+
);
113+
}
114+
BulkResponse bulkItemResponses = bulk.get();
115+
assertNoFailures(bulkItemResponses);
116+
117+
SearchResponse searchResponse = client().prepareSearch("test")
118+
.setQuery(QueryBuilders.matchPhraseQuery("message", "marking and sending shard"))
119+
.setSize(500)
120+
.highlighter(new HighlightBuilder().field("message"))
121+
.get();
122+
assertNoFailures(searchResponse);
123+
for (SearchHit searchHit : searchResponse.getHits()) {
124+
assertThat(
125+
searchHit.getHighlightFields().get("message").fragments()[0].string(),
126+
containsString("<em>marking and sending shard</em>")
127+
);
128+
}
129+
}
130+
131+
}

modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -211,9 +211,9 @@ private IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOExcepti
211211
};
212212
};
213213
}
214-
ValueFetcher valueFetcher = valueFetcher(searchExecutionContext, null);
215-
SourceProvider sourceProvider = searchExecutionContext.lookup();
216214
return context -> {
215+
ValueFetcher valueFetcher = valueFetcher(searchExecutionContext, null);
216+
SourceProvider sourceProvider = searchExecutionContext.lookup();
217217
valueFetcher.setNextReader(context);
218218
return docID -> {
219219
try {

modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/SourceConfirmedTextQuery.java

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,9 @@
99
package org.elasticsearch.index.mapper.extras;
1010

1111
import org.apache.lucene.analysis.Analyzer;
12+
import org.apache.lucene.index.FieldInfo;
1213
import org.apache.lucene.index.FieldInvertState;
14+
import org.apache.lucene.index.IndexOptions;
1315
import org.apache.lucene.index.LeafReaderContext;
1416
import org.apache.lucene.index.Term;
1517
import org.apache.lucene.index.TermStates;
@@ -25,6 +27,7 @@
2527
import org.apache.lucene.search.LeafSimScorer;
2628
import org.apache.lucene.search.MatchAllDocsQuery;
2729
import org.apache.lucene.search.MatchNoDocsQuery;
30+
import org.apache.lucene.search.Matches;
2831
import org.apache.lucene.search.MultiPhraseQuery;
2932
import org.apache.lucene.search.PhraseQuery;
3033
import org.apache.lucene.search.PrefixQuery;
@@ -288,6 +291,25 @@ public RuntimePhraseScorer scorer(LeafReaderContext context) throws IOException
288291
return new RuntimePhraseScorer(this, approximation, leafSimScorer, valueFetcher, field, in);
289292
}
290293

294+
@Override
295+
public Matches matches(LeafReaderContext context, int doc) throws IOException {
296+
FieldInfo fi = context.reader().getFieldInfos().fieldInfo(field);
297+
if (fi == null) {
298+
return null;
299+
}
300+
// Some highlighters will already have reindexed the source with positions and offsets,
301+
// so rather than doing it again we check to see if this data is available on the
302+
// current context and if so delegate directly to the inner query
303+
if (fi.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) > 0) {
304+
Weight innerWeight = in.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1);
305+
return innerWeight.matches(context, doc);
306+
}
307+
RuntimePhraseScorer scorer = scorer(context);
308+
if (scorer == null || scorer.iterator().advance(doc) != doc) {
309+
return null;
310+
}
311+
return scorer.matches();
312+
}
291313
};
292314
}
293315

@@ -380,6 +402,20 @@ private float computeFreq() throws IOException {
380402
}
381403
return frequency;
382404
}
405+
406+
private Matches matches() throws IOException {
407+
MemoryIndex index = new MemoryIndex(true, false);
408+
List<Object> values = valueFetcher.apply(docID());
409+
for (Object value : values) {
410+
if (value == null) {
411+
continue;
412+
}
413+
index.addField(field, value.toString(), indexAnalyzer);
414+
}
415+
IndexSearcher searcher = index.createSearcher();
416+
Weight w = searcher.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1);
417+
return w.matches(searcher.getLeafContexts().get(0), 0);
418+
}
383419
}
384420

385421
}

modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/SourceConfirmedTextQueryTests.java

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,13 @@
1010

1111
import org.apache.lucene.document.Document;
1212
import org.apache.lucene.document.Field.Store;
13+
import org.apache.lucene.document.KeywordField;
1314
import org.apache.lucene.document.TextField;
1415
import org.apache.lucene.index.DirectoryReader;
1516
import org.apache.lucene.index.IndexReader;
1617
import org.apache.lucene.index.IndexWriter;
1718
import org.apache.lucene.index.LeafReaderContext;
19+
import org.apache.lucene.index.ReaderUtil;
1820
import org.apache.lucene.index.Term;
1921
import org.apache.lucene.queries.spans.SpanNearQuery;
2022
import org.apache.lucene.queries.spans.SpanQuery;
@@ -23,12 +25,19 @@
2325
import org.apache.lucene.search.BooleanQuery;
2426
import org.apache.lucene.search.IndexSearcher;
2527
import org.apache.lucene.search.MatchNoDocsQuery;
28+
import org.apache.lucene.search.Matches;
29+
import org.apache.lucene.search.MatchesIterator;
2630
import org.apache.lucene.search.MultiPhraseQuery;
2731
import org.apache.lucene.search.PhraseQuery;
2832
import org.apache.lucene.search.PrefixQuery;
2933
import org.apache.lucene.search.Query;
3034
import org.apache.lucene.search.ScoreDoc;
35+
import org.apache.lucene.search.ScoreMode;
36+
import org.apache.lucene.search.Sort;
37+
import org.apache.lucene.search.SortedSetSelector;
3138
import org.apache.lucene.search.TermQuery;
39+
import org.apache.lucene.search.TopDocs;
40+
import org.apache.lucene.search.Weight;
3241
import org.apache.lucene.store.Directory;
3342
import org.apache.lucene.tests.search.CheckHits;
3443
import org.apache.lucene.util.IOFunction;
@@ -41,6 +50,8 @@
4150
import java.util.Collections;
4251
import java.util.List;
4352

53+
import static org.hamcrest.Matchers.greaterThan;
54+
4455
public class SourceConfirmedTextQueryTests extends ESTestCase {
4556

4657
private static final IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOException>> SOURCE_FETCHER_PROVIDER =
@@ -428,4 +439,65 @@ public void testEmptyIndex() throws Exception {
428439
}
429440
}
430441

442+
public void testMatches() throws Exception {
443+
checkMatches(new TermQuery(new Term("body", "d")), "a b c d e", new int[] { 3, 3 });
444+
checkMatches(new PhraseQuery("body", "b", "c"), "a b c d c b c a", new int[] { 1, 2, 5, 6 });
445+
}
446+
447+
private static void checkMatches(Query query, String inputDoc, int[] expectedMatches) throws IOException {
448+
try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(Lucene.STANDARD_ANALYZER))) {
449+
Document doc = new Document();
450+
doc.add(new TextField("body", "xxxxxnomatchxxxx", Store.YES));
451+
doc.add(new KeywordField("sort", "0", Store.NO));
452+
w.addDocument(doc);
453+
454+
doc = new Document();
455+
doc.add(new TextField("body", inputDoc, Store.YES));
456+
doc.add(new KeywordField("sort", "1", Store.NO));
457+
w.addDocument(doc);
458+
459+
doc = new Document();
460+
doc.add(new TextField("body", "xxxx " + inputDoc, Store.YES));
461+
doc.add(new KeywordField("sort", "2", Store.NO));
462+
w.addDocument(doc);
463+
464+
Query sourceConfirmedQuery = new SourceConfirmedTextQuery(query, SOURCE_FETCHER_PROVIDER, Lucene.STANDARD_ANALYZER);
465+
466+
try (IndexReader ir = DirectoryReader.open(w)) {
467+
468+
IndexSearcher searcher = new IndexSearcher(ir);
469+
TopDocs td = searcher.search(
470+
sourceConfirmedQuery,
471+
3,
472+
new Sort(KeywordField.newSortField("sort", false, SortedSetSelector.Type.MAX))
473+
);
474+
475+
Weight weight = searcher.createWeight(searcher.rewrite(sourceConfirmedQuery), ScoreMode.COMPLETE_NO_SCORES, 1);
476+
477+
int firstDoc = td.scoreDocs[0].doc;
478+
LeafReaderContext firstCtx = searcher.getLeafContexts().get(ReaderUtil.subIndex(firstDoc, searcher.getLeafContexts()));
479+
checkMatches(weight, firstCtx, firstDoc - firstCtx.docBase, expectedMatches, 0);
480+
481+
int secondDoc = td.scoreDocs[1].doc;
482+
LeafReaderContext secondCtx = searcher.getLeafContexts().get(ReaderUtil.subIndex(secondDoc, searcher.getLeafContexts()));
483+
checkMatches(weight, secondCtx, secondDoc - secondCtx.docBase, expectedMatches, 1);
484+
485+
}
486+
}
487+
}
488+
489+
private static void checkMatches(Weight w, LeafReaderContext ctx, int doc, int[] expectedMatches, int offset) throws IOException {
490+
Matches matches = w.matches(ctx, doc);
491+
assertNotNull(matches);
492+
MatchesIterator mi = matches.getMatches("body");
493+
int i = 0;
494+
while (mi.next()) {
495+
assertThat(expectedMatches.length, greaterThan(i + 1));
496+
assertEquals(mi.startPosition(), expectedMatches[i] + offset);
497+
assertEquals(mi.endPosition(), expectedMatches[i + 1] + offset);
498+
i += 2;
499+
}
500+
assertEquals(expectedMatches.length, i);
501+
}
502+
431503
}

0 commit comments

Comments
 (0)