Skip to content

Commit 3e40232

Browse files
committed
more tests; javadoc; tidy up
1 parent a591867 commit 3e40232

File tree

7 files changed

+137
-60
lines changed

7 files changed

+137
-60
lines changed

server/src/internalClusterTest/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@
6464
import java.util.List;
6565
import java.util.Locale;
6666
import java.util.Map;
67+
import java.util.Objects;
6768

6869
import static java.util.Collections.singletonList;
6970
import static java.util.Collections.singletonMap;
@@ -106,7 +107,7 @@
106107

107108
public class HighlighterSearchIT extends ESIntegTestCase {
108109
// TODO as we move analyzers out of the core we need to move some of these into HighlighterWithAnalyzersTests
109-
private static final String[] ALL_TYPES = new String[] { "plain", "fvh", "unified" };
110+
private static final String[] ALL_TYPES = new String[] { "plain", "fvh", "unified", "matches" };
110111

111112
@Override
112113
protected Collection<Class<? extends Plugin>> nodePlugins() {
@@ -3508,6 +3509,9 @@ public void testWithNestedQuery() throws Exception {
35083509
// but we highlight the root text field since nested documents cannot be highlighted with postings nor term vectors
35093510
// directly.
35103511
for (String type : ALL_TYPES) {
3512+
if (Objects.equals("matches", type)) {
3513+
continue; // matches highlighter doesn't support nested fields
3514+
}
35113515
SearchResponse searchResponse = client().prepareSearch()
35123516
.setQuery(nestedQuery("foo", prefixQuery("foo.text", "bro"), ScoreMode.None))
35133517
.highlighter(new HighlightBuilder().field(new Field("text").highlighterType(type).requireFieldMatch(false)))

server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightPhase.java

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -160,10 +160,7 @@ private FieldContext contextBuilders(
160160
)
161161
);
162162
}
163-
// TODO in future we can load the storedFields in advance here and make use of them,
164-
// but for now they are loaded separately in HighlightUtils so we only return whether
165-
// or not we need source.
166-
storedFieldsSpec = storedFieldsSpec.merge(new StoredFieldsSpec(sourceRequired, false, Set.of()));
163+
storedFieldsSpec = storedFieldsSpec.merge(new StoredFieldsSpec(sourceRequired, false, storedFields));
167164
}
168165
return new FieldContext(storedFieldsSpec, builders);
169166
}

server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightUtils.java

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,11 +43,8 @@ public static List<Object> loadFieldValues(
4343
FetchSubPhase.HitContext hitContext,
4444
boolean forceSource
4545
) throws IOException {
46-
if (forceSource == false && fieldType.isStored()) {
47-
CustomFieldsVisitor fieldVisitor = new CustomFieldsVisitor(singleton(fieldType.name()), false);
48-
hitContext.reader().document(hitContext.docId(), fieldVisitor);
49-
List<Object> textsToHighlight = fieldVisitor.fields().get(fieldType.name());
50-
return Objects.requireNonNullElse(textsToHighlight, Collections.emptyList());
46+
if (forceSource == false && hitContext.loadedFields().containsKey(fieldType.name())) {
47+
return hitContext.loadedFields().get(fieldType.name());
5148
}
5249
ValueFetcher fetcher = fieldType.valueFetcher(searchContext, null);
5350
fetcher.setNextReader(hitContext.readerContext());

server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/MatchesFieldHighlighter.java

Lines changed: 50 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -9,61 +9,68 @@
99
package org.elasticsearch.search.fetch.subphase.highlight;
1010

1111
import org.apache.lucene.analysis.Analyzer;
12-
import org.apache.lucene.analysis.AnalyzerWrapper;
1312
import org.apache.lucene.search.FilterMatchesIterator;
1413
import org.apache.lucene.search.Matches;
1514
import org.apache.lucene.search.MatchesIterator;
16-
import org.apache.lucene.search.highlight.OffsetLimitTokenFilter;
15+
import org.apache.lucene.search.MatchesUtils;
16+
import org.apache.lucene.search.matchhighlight.MatchRegionRetriever;
1717
import org.apache.lucene.search.matchhighlight.OffsetRange;
1818
import org.apache.lucene.search.matchhighlight.OffsetsFromTokens;
1919
import org.apache.lucene.search.matchhighlight.OffsetsRetrievalStrategy;
2020
import org.apache.lucene.search.matchhighlight.Passage;
2121
import org.apache.lucene.search.matchhighlight.PassageFormatter;
22-
import org.apache.lucene.search.matchhighlight.PassageSelector;
23-
import org.apache.lucene.search.uhighlight.PassageScorer;
2422
import org.elasticsearch.common.lucene.Lucene;
2523
import org.elasticsearch.index.mapper.TextSearchInfo;
2624

2725
import java.io.IOException;
2826
import java.util.ArrayList;
29-
import java.util.Comparator;
3027
import java.util.List;
28+
import java.util.Set;
3129

32-
public class MatchesFieldHighlighter {
30+
/**
31+
* Highlights individual fields using components from lucene's match highlighter
32+
*/
33+
class MatchesFieldHighlighter {
3334

3435
private final FieldHighlightContext context;
3536
private final Matches matches;
3637
private final Analyzer analyzer;
3738
private final String field;
3839

39-
public MatchesFieldHighlighter(FieldHighlightContext context, MatchesHighlighterState state) throws IOException {
40+
MatchesFieldHighlighter(FieldHighlightContext context, MatchesHighlighterState state) throws IOException {
4041
this.context = context;
4142
// TODO term vectors and require_field_match=false should intercept things here
42-
this.matches = state.getMatches(context.query, context.hitContext.docId());
43+
this.matches = state.getMatches(context.query, context.hitContext.readerContext(), context.hitContext.docId());
4344
this.analyzer = context.context.getSearchExecutionContext().getIndexAnalyzer(s -> Lucene.STANDARD_ANALYZER);
4445
this.field = context.fieldType.name();
4546
}
4647

47-
public MatchesIterator getMatchesIterator() throws IOException {
48+
/**
49+
* @return a MatchesIterator for this field, based on the field highlighter configuration
50+
*/
51+
MatchesIterator getMatchesIterator() throws IOException {
4852
if (this.matches == null) {
4953
return null;
5054
}
51-
MatchesIterator it = this.matches.getMatches(field);
52-
if (it == null || context.field.fieldOptions().maxAnalyzedOffset() == null) {
53-
return it;
55+
56+
Set<String> matchFields = context.field.fieldOptions().matchedFields();
57+
if (matchFields == null || matchFields.isEmpty()) {
58+
matchFields = Set.of(field);
5459
}
55-
int positionCutOff = context.field.fieldOptions().maxAnalyzedOffset() / 5;
56-
return new FilterMatchesIterator(it) {
57-
@Override
58-
public boolean next() throws IOException {
59-
if (it.next() == false) {
60-
return false;
61-
}
62-
return it.startPosition() <= positionCutOff;
60+
61+
List<MatchesIterator> fieldIterators = new ArrayList<>();
62+
for (String field : matchFields) {
63+
MatchesIterator it = this.matches.getMatches(field);
64+
if (it != null) {
65+
fieldIterators.add(it);
6366
}
64-
};
67+
}
68+
return MatchesUtils.disjunction(fieldIterators);
6569
}
6670

71+
/**
72+
* Uses a MatchesIterator to highlight a list of source inputs
73+
*/
6774
public List<String> buildHighlights(MatchesIterator it, List<CharSequence> sourceValues) throws IOException {
6875
String contiguousSourceText = buildContiguousSourceText(sourceValues);
6976
OffsetsRetrievalStrategy offsetsStrategy = getOffsetStrategy();
@@ -93,23 +100,31 @@ private OffsetsRetrievalStrategy getOffsetStrategy() {
93100
field,
94101
new XOffsetsFromPositions(field, analyzer)
95102
);
96-
case DOCS_AND_FREQS_AND_POSITIONS -> new XOffsetsFromPositions(field, analyzer);
103+
case DOCS_AND_FREQS_AND_POSITIONS -> limitOffsets(new XOffsetsFromPositions(field, analyzer));
97104
case DOCS_AND_FREQS, DOCS ->
98-
// By default retrieve offsets from individual tokens
99-
// retrieved by the analyzer (possibly narrowed down to
100-
// only those terms that the query hinted at when passed
101-
// a QueryVisitor.
102-
//
103-
// Alternative strategies are also possible and may make sense
104-
// depending on the use case (OffsetsFromValues, for example).
105105
new OffsetsFromTokens(field, analyzer);
106-
case NONE -> (matchesIterator, doc) -> {
107-
throw new IOException(
108-
"Field is indexed without positions and/or offsets: "
109-
+ field
110-
+ ", "
111-
+ tsi.luceneFieldType().indexOptions());
106+
// This should be unreachable because we won't get a MatchesIterator from an unindexed field
107+
case NONE -> (matchesIterator, doc) -> { throw new IllegalStateException("Field [ " + field + "] is not indexed"); };
108+
};
109+
}
110+
111+
// TODO might be more sensible to push this back into OffsetsFromPositions
112+
private OffsetsRetrievalStrategy limitOffsets(OffsetsRetrievalStrategy in) {
113+
if (context.field.fieldOptions().maxAnalyzedOffset() == null) {
114+
return in;
115+
}
116+
return (matchesIterator, doc) -> {
117+
int positionCutOff = context.field.fieldOptions().maxAnalyzedOffset() / 5;
118+
MatchesIterator wrapped = new FilterMatchesIterator(matchesIterator) {
119+
@Override
120+
public boolean next() throws IOException {
121+
if (matchesIterator.next() == false) {
122+
return false;
123+
}
124+
return matchesIterator.startPosition() <= positionCutOff;
125+
}
112126
};
127+
return in.get(wrapped, doc);
113128
};
114129
}
115130

server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/MatchesHighlighter.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@
1414
import java.io.IOException;
1515
import java.util.List;
1616

17+
/**
18+
* A highlighter that uses the output of a query's Matches to highlight tokens
19+
*/
1720
public class MatchesHighlighter implements Highlighter {
1821

1922
private static final String MATCHES_HIGHLIGHTER_CONFIG_KEY = "matches_highlighter_config_key";
@@ -28,7 +31,7 @@ public HighlightField highlight(FieldHighlightContext fieldContext) throws IOExc
2831

2932
MatchesHighlighterState state = (MatchesHighlighterState) fieldContext.cache.computeIfAbsent(
3033
MATCHES_HIGHLIGHTER_CONFIG_KEY,
31-
k -> new MatchesHighlighterState(fieldContext)
34+
k -> new MatchesHighlighterState(fieldContext.context.searcher().getIndexReader())
3235
);
3336

3437
MatchesFieldHighlighter fieldHighlighter = new MatchesFieldHighlighter(fieldContext, state);

server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/MatchesHighlighterState.java

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88

99
package org.elasticsearch.search.fetch.subphase.highlight;
1010

11+
import org.apache.lucene.index.IndexReader;
12+
import org.apache.lucene.index.LeafReaderContext;
1113
import org.apache.lucene.search.IndexSearcher;
1214
import org.apache.lucene.search.Matches;
1315
import org.apache.lucene.search.MatchesIterator;
@@ -22,7 +24,14 @@
2224
import java.util.Iterator;
2325
import java.util.Map;
2426

25-
public class MatchesHighlighterState {
27+
/**
28+
* Shared state for the matches highlighter
29+
*
30+
* This holds two caches, one for the query's Weight which is global across all documents,
31+
* and one for Matches for each query, which will be cached per document. This avoids having
32+
* to regenerate the weight and matches for each field being highlighted.
33+
*/
34+
class MatchesHighlighterState {
2635

2736
private static final Matches NO_MATCHES = new Matches() {
2837
@Override
@@ -41,22 +50,23 @@ public Iterator<String> iterator() {
4150
}
4251
};
4352

44-
private final FieldHighlightContext context;
4553
private final IndexSearcher searcher;
4654
private final Map<Query, Weight> weightCache = new HashMap<>();
4755
private final Map<Query, Matches> matchesCache = new HashMap<>();
4856

4957
private int currentDoc = -1;
58+
private int currentLeafOrd = -1;
5059

51-
public MatchesHighlighterState(FieldHighlightContext context) {
52-
this.context = context;
53-
this.searcher = context.context.searcher();
60+
MatchesHighlighterState(IndexReader reader) {
61+
this.searcher = new IndexSearcher(reader);
62+
this.searcher.setQueryCache(null); // disable caching
5463
}
5564

56-
public Matches getMatches(Query query, int doc) throws IOException {
57-
if (currentDoc != doc) {
65+
Matches getMatches(Query query, LeafReaderContext ctx, int doc) throws IOException {
66+
if (currentDoc != doc || currentLeafOrd != ctx.ord) {
5867
matchesCache.clear();
5968
currentDoc = doc;
69+
currentLeafOrd = ctx.ord;
6070
}
6171
Weight w = weightCache.get(query);
6272
if (w == null) {
@@ -65,7 +75,7 @@ public Matches getMatches(Query query, int doc) throws IOException {
6575
}
6676
Matches m = matchesCache.get(query);
6777
if (m == null) {
68-
m = w.matches(context.hitContext.readerContext(), doc);
78+
m = w.matches(ctx, doc);
6979
if (m == null) {
7080
m = NO_MATCHES;
7181
}
@@ -76,8 +86,4 @@ public Matches getMatches(Query query, int doc) throws IOException {
7686
}
7787
return m;
7888
}
79-
80-
public MatchesFieldHighlighter getMatchesFieldHighlighter(FieldHighlightContext fieldContext) throws IOException {
81-
return new MatchesFieldHighlighter(fieldContext, this);
82-
}
8389
}

server/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/MatchesHighlighterTests.java

Lines changed: 57 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,30 @@ public void testSimpleTermHighlighting() throws IOException {
3838
assertHighlights(highlights, "field", "this is <em>some</em> text");
3939
}
4040

41+
public void testMultipleFieldHighlighting() throws IOException {
42+
MapperService mapperService = createMapperService("""
43+
{ "_doc" : { "properties" : {
44+
"title" : { "type" : "text" },
45+
"description" : { "type" : "text" },
46+
"category" : { "type" : "keyword" }
47+
}}}
48+
""");
49+
50+
ParsedDocument doc = mapperService.documentMapper().parse(source("""
51+
{ "title" : "A tale of two cities",
52+
"description" : "It's a story about two cities",
53+
"category" : [ "fiction", "dickens" ] }
54+
"""));
55+
56+
SearchSourceBuilder search = new SearchSourceBuilder().query(
57+
QueryBuilders.queryStringQuery("dickens OR cities").field("title").field("description").field("category"))
58+
.highlighter(new HighlightBuilder().highlighterType("matches").field("title").field("category"));
59+
60+
Map<String, HighlightField> highlights = highlight(mapperService, doc, search);
61+
assertHighlights(highlights, "title", "A tale of two <em>cities</em>");
62+
assertHighlights(highlights, "category", "<em>dickens</em>");
63+
}
64+
4165
public void testScoring() throws Exception {
4266

4367
MapperService mapperService = createMapperService("""
@@ -105,7 +129,38 @@ public void testAnalyzedOffsetLimit() throws IOException {
105129
);
106130
}
107131

108-
// multiple fields
109-
// analyzed offset limit
110132
// matched_fields - use matches from a set of different fields to highlight this one
133+
public void testMatchedFields() throws IOException {
134+
135+
// note that this doesn't actually use a different analyzer for the subfield,
136+
// given restrictions on analyzers in unit tests
137+
MapperService mapperService = createMapperService("""
138+
{ "_doc" : { "properties" : {
139+
"description" : {
140+
"type" : "text",
141+
"fields" : {
142+
"stemmed" : { "type" : "text" }
143+
}
144+
}
145+
}}}
146+
""");
147+
148+
ParsedDocument doc = mapperService.documentMapper().parse(source("""
149+
{ "description" : "Here is some text" }
150+
"""));
151+
152+
HighlightBuilder highlight = new HighlightBuilder()
153+
.field(new HighlightBuilder.Field("description").matchedFields("description", "description.stemmed"))
154+
.highlighterType("matches");
155+
SearchSourceBuilder search = new SearchSourceBuilder().query(QueryBuilders.termQuery("description.stemmed", "some"))
156+
.highlighter(highlight);
157+
158+
Map<String, HighlightField> highlights = highlight(mapperService, doc, search);
159+
assertHighlights(
160+
highlights,
161+
"description",
162+
"Here is <em>some</em> text"
163+
);
164+
165+
}
111166
}

0 commit comments

Comments
 (0)