1717package org .apache .lucene .search .matchhighlight ;
1818
1919import java .io .IOException ;
20- import java .io .UncheckedIOException ;
2120import java .util .ArrayList ;
2221import java .util .Collection ;
2322import java .util .Collections ;
2928import java .util .function .Predicate ;
3029import java .util .stream .Stream ;
3130import org .apache .lucene .analysis .Analyzer ;
32- import org .apache .lucene .document .Document ;
33- import org .apache .lucene .document .DocumentStoredFieldVisitor ;
34- import org .apache .lucene .index .FieldInfo ;
35- import org .apache .lucene .index .IndexableField ;
3631import org .apache .lucene .index .LeafReader ;
3732import org .apache .lucene .search .IndexSearcher ;
3833import org .apache .lucene .search .Query ;
3934import org .apache .lucene .search .ScoreDoc ;
4035import org .apache .lucene .search .TopDocs ;
4136
4237/**
43- * An example highlighter that combines several lower-level highlighting utilities in this package
44- * into a fully featured, ready-to-use component.
38+ * An example highlighter that combines several lower-level utility classes in this package into a
39+ * fully featured, ready-to-use component.
4540 *
46- * <p>Note that if you need to customize or tweak the details of highlighting, it is better to
47- * assemble your own highlighter using those low-level building blocks, rather than extend or modify
48- * this one.
41+ * <p>Note: if you need to customize or tweak the details of highlighting, it is better to assemble
42+ * your own highlighter using those low-level building blocks, rather than extend or modify this
43+ * one.
4944 */
5045public class MatchHighlighter {
5146 private final IndexSearcher searcher ;
@@ -71,16 +66,16 @@ public interface FieldValueHighlighter {
7166 */
7267 boolean isApplicable (String field , boolean hasMatches );
7368
74- /** Do format field values appropriately . */
69+ /** Format field values into a list of final "highlights" . */
7570 List <String > format (
7671 String field ,
77- String [] values ,
72+ List < String > values ,
7873 String contiguousValue ,
7974 List <OffsetRange > valueRanges ,
8075 List <QueryOffsetRange > matchOffsets );
8176
8277 /**
83- * @return Returns a set of fields that must be fetched for each document, regardless of whether
78+ * @return Returns a set of fields that must be loaded from each document, regardless of whether
8479 * they had matches or not. This is useful to load and return certain fields that should
8580 * always be included (identifiers, document titles, etc.).
8681 */
@@ -106,7 +101,7 @@ public boolean isApplicable(String field, boolean hasMatches) {
106101 @ Override
107102 public List <String > format (
108103 String field ,
109- String [] values ,
104+ List < String > values ,
110105 String contiguousValue ,
111106 List <OffsetRange > valueRanges ,
112107 List <QueryOffsetRange > matchOffsets ) {
@@ -169,14 +164,14 @@ public QueryOffsetRange slice(int from, int to) {
169164
170165 private static class DocHit {
171166 final int docId ;
172- private final LeafReader leafReader ;
173- private final int leafDocId ;
174167 private final LinkedHashMap <String , List <QueryOffsetRange >> matchRanges = new LinkedHashMap <>();
168+ private final LinkedHashMap <String , List <String >> fieldValues = new LinkedHashMap <>();
175169
176- DocHit (int docId , LeafReader leafReader , int leafDocId ) {
170+ DocHit (int docId , MatchRegionRetriever . FieldValueProvider fieldValueProvider ) {
177171 this .docId = docId ;
178- this .leafReader = leafReader ;
179- this .leafDocId = leafDocId ;
172+ for (var fieldName : fieldValueProvider ) {
173+ fieldValues .put (fieldName , fieldValueProvider .getValues (fieldName ));
174+ }
180175 }
181176
182177 void addMatches (Query query , Map <String , List <OffsetRange >> hits ) {
@@ -187,22 +182,6 @@ void addMatches(Query query, Map<String, List<OffsetRange>> hits) {
187182 offsets .forEach (o -> target .add (new QueryOffsetRange (query , o .from , o .to )));
188183 });
189184 }
190-
191- Document document (Predicate <String > needsField ) throws IOException {
192- // Only load the fields that have a chance to be highlighted.
193- DocumentStoredFieldVisitor visitor =
194- new DocumentStoredFieldVisitor () {
195- @ Override
196- public Status needsField (FieldInfo fieldInfo ) {
197- return (matchRanges .containsKey (fieldInfo .name ) || needsField .test (fieldInfo .name ))
198- ? Status .YES
199- : Status .NO ;
200- }
201- };
202-
203- leafReader .storedFields ().document (leafDocId , visitor );
204- return visitor .getDocument ();
205- }
206185 }
207186
208187 public MatchHighlighter (IndexSearcher searcher , Analyzer analyzer ) {
@@ -223,25 +202,44 @@ public MatchHighlighter(
223202
224203 public Stream <DocHighlights > highlight (TopDocs topDocs , Query ... queries ) throws IOException {
225204 // We want to preserve topDocs document ordering and MatchRegionRetriever is optimized
226- // for streaming, so we'll just prepopulate the map in proper order.
205+ // for streaming, so we'll just populate the map in proper order.
227206 LinkedHashMap <Integer , DocHit > docHits = new LinkedHashMap <>();
228207 for (ScoreDoc scoreDoc : topDocs .scoreDocs ) {
229208 docHits .put (scoreDoc .doc , null );
230209 }
231210
211+ Predicate <String > fieldsToLoadUnconditionally = fieldsAlwaysReturned ::contains ;
212+ Predicate <String > fieldsToLoadIfWithHits =
213+ fieldName -> {
214+ // We're interested in any fields for which existing highlighters are applicable (with or
215+ // without hits).
216+ return fieldHighlighters .stream ()
217+ .anyMatch (
218+ highlighter ->
219+ highlighter .isApplicable (fieldName , true )
220+ || highlighter .isApplicable (fieldName , false ));
221+ };
222+
232223 // Collect match ranges for each query and associate each range to the origin query.
233224 for (Query q : queries ) {
234225 MatchRegionRetriever highlighter =
235- new MatchRegionRetriever (searcher , searcher .rewrite (q ), offsetsRetrievalStrategies );
226+ new MatchRegionRetriever (
227+ searcher ,
228+ searcher .rewrite (q ),
229+ offsetsRetrievalStrategies ,
230+ fieldsToLoadUnconditionally ,
231+ fieldsToLoadIfWithHits );
232+
236233 highlighter .highlightDocuments (
237234 topDocs ,
238235 (int docId ,
239236 LeafReader leafReader ,
240237 int leafDocId ,
238+ MatchRegionRetriever .FieldValueProvider fieldValueProvider ,
241239 Map <String , List <OffsetRange >> hits ) -> {
242240 DocHit docHit = docHits .get (docId );
243241 if (docHit == null ) {
244- docHit = new DocHit (docId , leafReader , leafDocId );
242+ docHit = new DocHit (docId , fieldValueProvider );
245243 docHits .put (docId , docHit );
246244 }
247245 docHit .addMatches (q , hits );
@@ -254,23 +252,11 @@ public Stream<DocHighlights> highlight(TopDocs topDocs, Query... queries) throws
254252 }
255253
256254 private DocHighlights computeDocFieldValues (DocHit docHit ) {
257- Document doc ;
258- try {
259- doc = docHit .document (fieldsAlwaysReturned ::contains );
260- } catch (IOException e ) {
261- throw new UncheckedIOException (e );
262- }
263-
264255 DocHighlights docHighlights = new DocHighlights (docHit .docId );
265256
266- HashSet <String > unique = new HashSet <>();
267- for (IndexableField indexableField : doc ) {
268- String field = indexableField .name ();
269- if (!unique .add (field )) {
270- continue ;
271- }
272-
273- String [] values = doc .getValues (field );
257+ for (var e : docHit .fieldValues .entrySet ()) {
258+ String field = e .getKey ();
259+ List <String > values = e .getValue ();
274260 String contiguousValue = contiguousFieldValue (field , values );
275261 List <OffsetRange > valueRanges = computeValueRanges (field , values );
276262 List <QueryOffsetRange > offsets = docHit .matchRanges .get (field );
@@ -287,7 +273,7 @@ private DocHighlights computeDocFieldValues(DocHit docHit) {
287273 return docHighlights ;
288274 }
289275
290- private List <OffsetRange > computeValueRanges (String field , String [] values ) {
276+ private List <OffsetRange > computeValueRanges (String field , List < String > values ) {
291277 ArrayList <OffsetRange > valueRanges = new ArrayList <>();
292278 int offset = 0 ;
293279 for (CharSequence v : values ) {
@@ -298,10 +284,10 @@ private List<OffsetRange> computeValueRanges(String field, String[] values) {
298284 return valueRanges ;
299285 }
300286
301- private String contiguousFieldValue (String field , String [] values ) {
287+ private String contiguousFieldValue (String field , List < String > values ) {
302288 String value ;
303- if (values .length == 1 ) {
304- value = values [ 0 ] ;
289+ if (values .size () == 1 ) {
290+ value = values . get ( 0 ) ;
305291 } else {
306292 // TODO: This can be inefficient if offset gap is large but the logic
307293 // of applying offsets would get much more complicated so leaving for now
0 commit comments