2626import org .elasticsearch .index .mapper .vectors .DenseVectorFieldMapper .DenseVectorFieldType ;
2727import org .elasticsearch .index .mapper .vectors .SparseVectorFieldMapper .SparseVectorFieldType ;
2828import org .elasticsearch .index .query .SearchExecutionContext ;
29+ import org .elasticsearch .search .fetch .FetchSubPhase ;
30+ import org .elasticsearch .search .fetch .subphase .highlight .DefaultHighlighter ;
2931import org .elasticsearch .search .fetch .subphase .highlight .FieldHighlightContext ;
3032import org .elasticsearch .search .fetch .subphase .highlight .HighlightField ;
33+ import org .elasticsearch .search .fetch .subphase .highlight .HighlightUtils ;
3134import org .elasticsearch .search .fetch .subphase .highlight .Highlighter ;
3235import org .elasticsearch .search .vectors .VectorData ;
3336import org .elasticsearch .xpack .core .ml .search .SparseVectorQueryWrapper ;
37+ import org .elasticsearch .xpack .inference .mapper .OffsetSourceField ;
38+ import org .elasticsearch .xpack .inference .mapper .OffsetSourceFieldMapper ;
3439import org .elasticsearch .xpack .inference .mapper .SemanticTextField ;
3540import org .elasticsearch .xpack .inference .mapper .SemanticTextFieldMapper ;
41+ import org .elasticsearch .xpack .inference .mapper .SemanticTextFieldMapper .SemanticTextFieldType ;
3642
3743import java .io .IOException ;
44+ import java .io .UncheckedIOException ;
3845import java .util .ArrayList ;
3946import java .util .Comparator ;
47+ import java .util .HashMap ;
4048import java .util .List ;
4149import java .util .Locale ;
4250import java .util .Map ;
51+ import java .util .function .Function ;
52+
53+ import static org .elasticsearch .lucene .search .uhighlight .CustomUnifiedHighlighter .MULTIVAL_SEP_CHAR ;
4354
4455/**
4556 * A {@link Highlighter} designed for the {@link SemanticTextFieldMapper}.
4960public class SemanticTextHighlighter implements Highlighter {
5061 public static final String NAME = "semantic" ;
5162
52- private record OffsetAndScore (int offset , float score ) {}
63+ private record OffsetAndScore (int index , OffsetSourceFieldMapper . OffsetSource offset , float score ) {}
5364
5465 @ Override
5566 public boolean canHighlight (MappedFieldType fieldType ) {
56- if (fieldType instanceof SemanticTextFieldMapper .SemanticTextFieldType semanticTextFieldType ) {
57- // TODO: Implement highlighting when using inference metadata fields
58- return semanticTextFieldType .useLegacyFormat ();
59- }
60- return false ;
67+ return fieldType instanceof SemanticTextFieldType ;
6168 }
6269
6370 @ Override
6471 public HighlightField highlight (FieldHighlightContext fieldContext ) throws IOException {
65- SemanticTextFieldMapper .SemanticTextFieldType fieldType = (SemanticTextFieldMapper .SemanticTextFieldType ) fieldContext .fieldType ;
72+ if (canHighlight (fieldContext .fieldType ) == false ) {
73+ return null ;
74+ }
75+ SemanticTextFieldType fieldType = (SemanticTextFieldType ) fieldContext .fieldType ;
6676 if (fieldType .getEmbeddingsField () == null ) {
6777 // nothing indexed yet
6878 return null ;
@@ -105,28 +115,36 @@ public HighlightField highlight(FieldHighlightContext fieldContext) throws IOExc
105115 int size = Math .min (chunks .size (), numberOfFragments );
106116 if (fieldContext .field .fieldOptions ().scoreOrdered () == false ) {
107117 chunks = chunks .subList (0 , size );
108- chunks .sort (Comparator .comparingInt (c -> c .offset ));
118+ chunks .sort (Comparator .comparingInt (c -> c .index ));
109119 }
110120 Text [] snippets = new Text [size ];
111- List <Map <?, ?>> nestedSources = XContentMapValues .extractNestedSources (
112- fieldType .getChunksField ().fullPath (),
113- fieldContext .hitContext .source ().source ()
114- );
121+ final Function <OffsetAndScore , String > offsetToContent ;
122+ if (fieldType .useLegacyFormat ()) {
123+ List <Map <?, ?>> nestedSources = XContentMapValues .extractNestedSources (
124+ fieldType .getChunksField ().fullPath (),
125+ fieldContext .hitContext .source ().source ()
126+ );
127+ offsetToContent = entry -> getContentFromLegacyNestedSources (fieldType .name (), entry , nestedSources );
128+ } else {
129+ Map <String , String > fieldToContent = new HashMap <>();
130+ offsetToContent = entry -> {
131+ String content = fieldToContent .computeIfAbsent (entry .offset ().field (), key -> {
132+ try {
133+ return extractFieldContent (
134+ fieldContext .context .getSearchExecutionContext (),
135+ fieldContext .hitContext ,
136+ entry .offset .field ()
137+ );
138+ } catch (IOException e ) {
139+ throw new UncheckedIOException ("Error extracting field content from field " + entry .offset .field (), e );
140+ }
141+ });
142+ return content .substring (entry .offset ().start (), entry .offset ().end ());
143+ };
144+ }
115145 for (int i = 0 ; i < size ; i ++) {
116146 var chunk = chunks .get (i );
117- if (nestedSources .size () <= chunk .offset ) {
118- throw new IllegalStateException (
119- String .format (
120- Locale .ROOT ,
121- "Invalid content detected for field [%s]: the chunks size is [%d], "
122- + "but a reference to offset [%d] was found in the result." ,
123- fieldType .name (),
124- nestedSources .size (),
125- chunk .offset
126- )
127- );
128- }
129- String content = (String ) nestedSources .get (chunk .offset ).get (SemanticTextField .CHUNKED_TEXT_FIELD );
147+ String content = offsetToContent .apply (chunk );
130148 if (content == null ) {
131149 throw new IllegalStateException (
132150 String .format (
@@ -143,10 +161,43 @@ public HighlightField highlight(FieldHighlightContext fieldContext) throws IOExc
143161 return new HighlightField (fieldContext .fieldName , snippets );
144162 }
145163
164+ private String extractFieldContent (SearchExecutionContext searchContext , FetchSubPhase .HitContext hitContext , String sourceField )
165+ throws IOException {
166+ var sourceFieldType = searchContext .getMappingLookup ().getFieldType (sourceField );
167+ if (sourceFieldType == null ) {
168+ return null ;
169+ }
170+
171+ var values = HighlightUtils .loadFieldValues (sourceFieldType , searchContext , hitContext )
172+ .stream ()
173+ .<Object >map ((s ) -> DefaultHighlighter .convertFieldValue (sourceFieldType , s ))
174+ .toList ();
175+ if (values .size () == 0 ) {
176+ return null ;
177+ }
178+ return DefaultHighlighter .mergeFieldValues (values , MULTIVAL_SEP_CHAR );
179+ }
180+
181+ private String getContentFromLegacyNestedSources (String fieldName , OffsetAndScore cand , List <Map <?, ?>> nestedSources ) {
182+ if (nestedSources .size () <= cand .index ) {
183+ throw new IllegalStateException (
184+ String .format (
185+ Locale .ROOT ,
186+ "Invalid content detected for field [%s]: the chunks size is [%d], "
187+ + "but a reference to offset [%d] was found in the result." ,
188+ fieldName ,
189+ nestedSources .size (),
190+ cand .index
191+ )
192+ );
193+ }
194+ return (String ) nestedSources .get (cand .index ).get (SemanticTextField .CHUNKED_TEXT_FIELD );
195+ }
196+
146197 private List <OffsetAndScore > extractOffsetAndScores (
147198 SearchExecutionContext context ,
148199 LeafReader reader ,
149- SemanticTextFieldMapper . SemanticTextFieldType fieldType ,
200+ SemanticTextFieldType fieldType ,
150201 int docId ,
151202 List <Query > leafQueries
152203 ) throws IOException {
@@ -164,10 +215,31 @@ private List<OffsetAndScore> extractOffsetAndScores(
164215 } else if (scorer .iterator ().nextDoc () == DocIdSetIterator .NO_MORE_DOCS ) {
165216 return List .of ();
166217 }
218+
219+ OffsetSourceField .OffsetSourceLoader offsetReader = null ;
220+ if (fieldType .useLegacyFormat () == false ) {
221+ var terms = reader .terms (fieldType .getOffsetsField ().fullPath ());
222+ if (terms == null ) {
223+ // The field is empty
224+ return List .of ();
225+ }
226+ offsetReader = OffsetSourceField .loader (terms );
227+ }
228+
167229 List <OffsetAndScore > results = new ArrayList <>();
168- int offset = 0 ;
230+ int index = 0 ;
169231 while (scorer .docID () < docId ) {
170- results .add (new OffsetAndScore (offset ++, scorer .score ()));
232+ if (offsetReader != null ) {
233+ var offset = offsetReader .advanceTo (scorer .docID ());
234+ if (offset == null ) {
235+ throw new IllegalStateException (
236+ "Cannot highlight field [" + fieldType .name () + "], missing offsets for doc [" + docId + "]"
237+ );
238+ }
239+ results .add (new OffsetAndScore (index ++, offset , scorer .score ()));
240+ } else {
241+ results .add (new OffsetAndScore (index ++, null , scorer .score ()));
242+ }
171243 if (scorer .iterator ().nextDoc () == DocIdSetIterator .NO_MORE_DOCS ) {
172244 break ;
173245 }
0 commit comments