Skip to content

Commit d5e93a8

Browse files
authored
Use storedFieldsSpec to load stored fields for highlighting (#91841)
Since #91269, fetch phase subprocessors can report any stored fields that they need back to the top-level FetchPhase so that all stored fields can be loaded up front. This commit switches the unified and plain highlighters to use this functionality so that highlighting does not need to open stored field readers twice.
1 parent 85c55a9 commit d5e93a8

File tree

9 files changed

+153
-28
lines changed

9 files changed

+153
-28
lines changed

docs/changelog/91841.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 91841
2+
summary: Use `storedFieldsSpec` to load stored fields for highlighting
3+
area: Highlighting
4+
type: enhancement
5+
issues: []

modules/percolator/src/main/java/org/elasticsearch/percolator/PercolatorHighlightSubFetchPhase.java

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
import org.apache.lucene.search.QueryVisitor;
1515
import org.elasticsearch.common.bytes.BytesReference;
1616
import org.elasticsearch.common.document.DocumentField;
17+
import org.elasticsearch.index.fieldvisitor.LeafStoredFieldLoader;
18+
import org.elasticsearch.index.fieldvisitor.StoredFieldLoader;
1719
import org.elasticsearch.search.SearchHit;
1820
import org.elasticsearch.search.fetch.FetchContext;
1921
import org.elasticsearch.search.fetch.FetchSubPhase;
@@ -85,18 +87,22 @@ public void process(HitContext hit) throws IOException {
8587
continue;
8688
}
8789

90+
SearchHighlightContext highlight = new SearchHighlightContext(fetchContext.highlight().fields());
91+
FetchSubPhaseProcessor processor = highlightPhase.getProcessor(fetchContext, highlight, query);
92+
StoredFieldLoader storedFields = StoredFieldLoader.fromSpec(processor.storedFieldsSpec());
93+
LeafStoredFieldLoader leafStoredFields = storedFields.getLoader(percolatorLeafReaderContext, null);
94+
8895
for (Object matchedSlot : field.getValues()) {
8996
int slot = (int) matchedSlot;
9097
BytesReference document = percolateQuery.getDocuments().get(slot);
98+
leafStoredFields.advanceTo(slot);
9199
HitContext subContext = new HitContext(
92100
new SearchHit(slot, "unknown"),
93101
percolatorLeafReaderContext,
94102
slot,
95-
Map.of(),
103+
leafStoredFields.storedFields(),
96104
Source.fromBytes(document)
97105
);
98-
SearchHighlightContext highlight = new SearchHighlightContext(fetchContext.highlight().fields());
99-
FetchSubPhaseProcessor processor = highlightPhase.getProcessor(fetchContext, highlight, query);
100106
processor.process(subContext);
101107
for (Map.Entry<String, HighlightField> entry : subContext.hit().getHighlightFields().entrySet()) {
102108
if (percolateQuery.getDocuments().size() == 1) {

server/src/main/java/org/elasticsearch/index/fieldvisitor/StoredFieldLoader.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import org.elasticsearch.common.CheckedBiConsumer;
1515
import org.elasticsearch.common.bytes.BytesReference;
1616
import org.elasticsearch.common.lucene.index.SequentialStoredFieldsLeafReader;
17+
import org.elasticsearch.search.fetch.StoredFieldsSpec;
1718

1819
import java.io.IOException;
1920
import java.util.Collections;
@@ -41,6 +42,16 @@ public abstract class StoredFieldLoader {
4142
*/
4243
public abstract List<String> fieldsToLoad();
4344

45+
/**
46+
* Creates a new StoredFieldLoader using a StoredFieldsSpec
47+
*/
48+
public static StoredFieldLoader fromSpec(StoredFieldsSpec spec) {
49+
if (spec.noRequirements()) {
50+
return StoredFieldLoader.empty();
51+
}
52+
return create(spec.requiresSource(), spec.requiredStoredFields());
53+
}
54+
4455
/**
4556
* Creates a new StoredFieldLoader
4657
* @param loadSource should this loader load the _source field

server/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ private SearchHits buildSearchHits(SearchContext context, Profiler profiler) {
104104
StoredFieldsSpec storedFieldsSpec = StoredFieldsSpec.build(processors, FetchSubPhaseProcessor::storedFieldsSpec);
105105
storedFieldsSpec = storedFieldsSpec.merge(new StoredFieldsSpec(false, false, sourceLoader.requiredStoredFields()));
106106

107-
StoredFieldLoader storedFieldLoader = profiler.storedFields(buildStoredFieldsLoader(storedFieldsSpec));
107+
StoredFieldLoader storedFieldLoader = profiler.storedFields(StoredFieldLoader.fromSpec(storedFieldsSpec));
108108
boolean requiresSource = storedFieldsSpec.requiresSource();
109109

110110
NestedDocuments nestedDocuments = context.getSearchExecutionContext().getNestedDocuments();
@@ -168,13 +168,6 @@ protected SearchHit nextDoc(int doc) throws IOException {
168168
return new SearchHits(hits, totalHits, context.getMaxScore());
169169
}
170170

171-
private static StoredFieldLoader buildStoredFieldsLoader(StoredFieldsSpec spec) {
172-
if (spec.noRequirements()) {
173-
return StoredFieldLoader.empty();
174-
}
175-
return StoredFieldLoader.create(spec.requiresSource(), spec.requiredStoredFields());
176-
}
177-
178171
List<FetchSubPhaseProcessor> getProcessors(SearchShardTarget target, FetchContext context, Profiler profiler) {
179172
try {
180173
List<FetchSubPhaseProcessor> processors = new ArrayList<>();

server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightPhase.java

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -153,10 +153,7 @@ private FieldContext contextBuilders(
153153
)
154154
);
155155
}
156-
// TODO in future we can load the storedFields in advance here and make use of them,
157-
// but for now they are loaded separately in HighlightUtils so we only return whether
158-
// or not we need source.
159-
storedFieldsSpec = storedFieldsSpec.merge(new StoredFieldsSpec(sourceRequired, false, Set.of()));
156+
storedFieldsSpec = storedFieldsSpec.merge(new StoredFieldsSpec(sourceRequired, false, storedFields));
160157
}
161158
return new FieldContext(storedFieldsSpec, builders);
162159
}

server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightUtils.java

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,19 +10,14 @@
1010
import org.apache.lucene.search.highlight.DefaultEncoder;
1111
import org.apache.lucene.search.highlight.Encoder;
1212
import org.apache.lucene.search.highlight.SimpleHTMLEncoder;
13-
import org.elasticsearch.index.fieldvisitor.CustomFieldsVisitor;
1413
import org.elasticsearch.index.mapper.MappedFieldType;
1514
import org.elasticsearch.index.mapper.ValueFetcher;
1615
import org.elasticsearch.index.query.SearchExecutionContext;
1716
import org.elasticsearch.search.fetch.FetchSubPhase;
1817

1918
import java.io.IOException;
2019
import java.util.ArrayList;
21-
import java.util.Collections;
2220
import java.util.List;
23-
import java.util.Objects;
24-
25-
import static java.util.Collections.singleton;
2621

2722
public final class HighlightUtils {
2823

@@ -43,10 +38,8 @@ public static List<Object> loadFieldValues(
4338
FetchSubPhase.HitContext hitContext
4439
) throws IOException {
4540
if (fieldType.isStored()) {
46-
CustomFieldsVisitor fieldVisitor = new CustomFieldsVisitor(singleton(fieldType.name()), false);
47-
hitContext.reader().document(hitContext.docId(), fieldVisitor);
48-
List<Object> textsToHighlight = fieldVisitor.fields().get(fieldType.name());
49-
return Objects.requireNonNullElse(textsToHighlight, Collections.emptyList());
41+
List<Object> values = hitContext.loadedFields().get(fieldType.name());
42+
return values == null ? List.of() : values;
5043
}
5144
ValueFetcher fetcher = fieldType.valueFetcher(searchContext, null);
5245
fetcher.setNextReader(hitContext.readerContext());

server/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/CustomUnifiedHighlighterTests.java

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,4 +38,61 @@ public void testSimpleTermHighlighting() throws IOException {
3838
assertHighlights(highlights, "field", "this is <em>some</em> text");
3939
}
4040

41+
public void testStoredFieldHighlighting() throws IOException {
42+
43+
MapperService mapperService = createMapperService("""
44+
{ "_doc" : { "properties" : {
45+
"field" : { "type" : "text", "store" : true }
46+
}}}
47+
""");
48+
49+
ParsedDocument doc = mapperService.documentMapper().parse(source("""
50+
{ "field" : "this is some text" }
51+
"""));
52+
53+
SearchSourceBuilder search = new SearchSourceBuilder().query(QueryBuilders.termQuery("field", "some"))
54+
.highlighter(new HighlightBuilder().field("field"));
55+
56+
Map<String, HighlightField> highlights = highlight(mapperService, doc, search);
57+
assertHighlights(highlights, "field", "this is <em>some</em> text");
58+
}
59+
60+
public void testStoredCopyToFieldHighlighting() throws IOException {
61+
MapperService mapperService = createMapperService("""
62+
{ "_doc" : { "properties" : {
63+
"field_source" : { "type" : "text", "copy_to" : [ "field" ] },
64+
"field" : { "type" : "text", "store" : true }
65+
}}}
66+
""");
67+
68+
ParsedDocument doc = mapperService.documentMapper().parse(source("""
69+
{ "field_source" : "this is some text" }
70+
"""));
71+
72+
SearchSourceBuilder search = new SearchSourceBuilder().query(QueryBuilders.termQuery("field", "some"))
73+
.highlighter(new HighlightBuilder().field("field"));
74+
75+
Map<String, HighlightField> highlights = highlight(mapperService, doc, search);
76+
assertHighlights(highlights, "field", "this is <em>some</em> text");
77+
}
78+
79+
public void testUnstoredCopyToFieldHighlighting() throws IOException {
80+
MapperService mapperService = createMapperService("""
81+
{ "_doc" : { "properties" : {
82+
"field_source" : { "type" : "text", "copy_to" : [ "field" ] },
83+
"field" : { "type" : "text" }
84+
}}}
85+
""");
86+
87+
ParsedDocument doc = mapperService.documentMapper().parse(source("""
88+
{ "field_source" : "this is some text" }
89+
"""));
90+
91+
SearchSourceBuilder search = new SearchSourceBuilder().query(QueryBuilders.termQuery("field", "some"))
92+
.highlighter(new HighlightBuilder().field("field"));
93+
94+
Map<String, HighlightField> highlights = highlight(mapperService, doc, search);
95+
assertHighlights(highlights, "field", "this is <em>some</em> text");
96+
}
97+
4198
}

test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -236,12 +236,12 @@ protected static IndexSettings createIndexSettings(Version version, Settings set
236236
protected final void withLuceneIndex(
237237
MapperService mapperService,
238238
CheckedConsumer<RandomIndexWriter, IOException> builder,
239-
CheckedConsumer<IndexReader, IOException> test
239+
CheckedConsumer<DirectoryReader, IOException> test
240240
) throws IOException {
241241
IndexWriterConfig iwc = new IndexWriterConfig(IndexShard.buildIndexAnalyzer(mapperService));
242242
try (Directory dir = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc)) {
243243
builder.accept(iw);
244-
try (IndexReader reader = iw.getReader()) {
244+
try (DirectoryReader reader = iw.getReader()) {
245245
test.accept(reader);
246246
}
247247
}

test/framework/src/main/java/org/elasticsearch/search/fetch/HighlighterTestCase.java

Lines changed: 65 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,12 @@
88

99
package org.elasticsearch.search.fetch;
1010

11+
import org.apache.lucene.index.DirectoryReader;
12+
import org.apache.lucene.index.FilterDirectoryReader;
13+
import org.apache.lucene.index.FilterLeafReader;
14+
import org.apache.lucene.index.IndexableField;
15+
import org.apache.lucene.index.LeafReader;
16+
import org.apache.lucene.index.StoredFields;
1117
import org.apache.lucene.search.IndexSearcher;
1218
import org.elasticsearch.common.text.Text;
1319
import org.elasticsearch.index.mapper.MapperService;
@@ -26,6 +32,7 @@
2632
import org.elasticsearch.search.lookup.Source;
2733

2834
import java.io.IOException;
35+
import java.util.ArrayList;
2936
import java.util.Arrays;
3037
import java.util.HashMap;
3138
import java.util.List;
@@ -58,15 +65,19 @@ protected final Map<String, HighlightField> highlight(MapperService mapperServic
5865
throws IOException {
5966
Map<String, HighlightField> highlights = new HashMap<>();
6067
withLuceneIndex(mapperService, iw -> iw.addDocument(doc.rootDoc()), ir -> {
61-
SearchExecutionContext context = createSearchExecutionContext(mapperService, new IndexSearcher(ir));
68+
SearchExecutionContext context = createSearchExecutionContext(
69+
mapperService,
70+
new IndexSearcher(new NoStoredFieldsFilterDirectoryReader(ir))
71+
);
6272
HighlightPhase highlightPhase = new HighlightPhase(getHighlighters());
6373
FetchSubPhaseProcessor processor = highlightPhase.getProcessor(fetchContext(context, search));
74+
Map<String, List<Object>> storedFields = storedFields(processor.storedFieldsSpec(), doc);
6475
Source source = Source.fromBytes(doc.source());
6576
FetchSubPhase.HitContext hitContext = new FetchSubPhase.HitContext(
6677
new SearchHit(0, "id"),
6778
ir.leaves().get(0),
6879
0,
69-
Map.of(),
80+
storedFields,
7081
source
7182
);
7283
processor.process(hitContext);
@@ -75,6 +86,17 @@ protected final Map<String, HighlightField> highlight(MapperService mapperServic
7586
return highlights;
7687
}
7788

89+
private static Map<String, List<Object>> storedFields(StoredFieldsSpec spec, ParsedDocument doc) {
90+
Map<String, List<Object>> storedFields = new HashMap<>();
91+
for (String field : spec.requiredStoredFields()) {
92+
List<Object> values = storedFields.computeIfAbsent(field, f -> new ArrayList<>());
93+
for (IndexableField f : doc.rootDoc().getFields(field)) {
94+
values.add(f.stringValue());
95+
}
96+
}
97+
return storedFields;
98+
}
99+
78100
/**
79101
* Given a set of highlights, assert that any particular field has the expected fragments
80102
*/
@@ -93,4 +115,45 @@ private static FetchContext fetchContext(SearchExecutionContext context, SearchS
93115
when(fetchContext.sourceLoader()).thenReturn(context.newSourceLoader(false));
94116
return fetchContext;
95117
}
118+
119+
// Wraps a DirectoryReader and ensures that we don't load stored fields from it. For highlighting,
120+
// stored field access should all be done by the FetchPhase and the highlighter subphases should
121+
// only be retrieving them from the hit context
122+
private static class NoStoredFieldsFilterDirectoryReader extends FilterDirectoryReader {
123+
124+
NoStoredFieldsFilterDirectoryReader(DirectoryReader in) throws IOException {
125+
super(in, new SubReaderWrapper() {
126+
@Override
127+
public LeafReader wrap(LeafReader reader) {
128+
return new FilterLeafReader(reader) {
129+
130+
@Override
131+
public StoredFields storedFields() throws IOException {
132+
throw new AssertionError("Called Stored Fields!");
133+
}
134+
135+
@Override
136+
public CacheHelper getCoreCacheHelper() {
137+
return in.getCoreCacheHelper();
138+
}
139+
140+
@Override
141+
public CacheHelper getReaderCacheHelper() {
142+
return in.getReaderCacheHelper();
143+
}
144+
};
145+
}
146+
});
147+
}
148+
149+
@Override
150+
protected DirectoryReader doWrapDirectoryReader(DirectoryReader in) throws IOException {
151+
return new NoStoredFieldsFilterDirectoryReader(in);
152+
}
153+
154+
@Override
155+
public CacheHelper getReaderCacheHelper() {
156+
return in.getReaderCacheHelper();
157+
}
158+
}
96159
}

0 commit comments

Comments
 (0)