@@ -104,6 +104,8 @@ public HighlightField highlight(FieldHighlightContext fieldContext) throws IOExc
104104 ? 1 // we return the best fragment by default
105105 : fieldContext .field .fieldOptions ().numberOfFragments ();
106106
107+ // TODO: Right now this will default to 100 if not set. If the user does not set fragmentSize for
108+ // the semantic highlighter, we probably just want to return unadulterated chunks instead.
107109 int fragmentCharSize = fieldContext .field .fieldOptions ().fragmentCharSize ();
108110
109111 List <OffsetAndScore > chunks = extractOffsetAndScores (
@@ -113,7 +115,7 @@ public HighlightField highlight(FieldHighlightContext fieldContext) throws IOExc
113115 fieldContext .hitContext .docId (),
114116 queries
115117 );
116- if (chunks .size () == 0 ) {
118+ if (chunks .isEmpty () ) {
117119 return null ;
118120 }
119121
@@ -170,30 +172,54 @@ public HighlightField highlight(FieldHighlightContext fieldContext) throws IOExc
170172 }
171173 consumedChunks [i ] = true ;
172174
173- // Chunks smaller than the requested fragmentCharSize will be concatenated with neighboring chunks
174175 if (fragmentCharSize > 0 && content .length () < fragmentCharSize ) {
175- StringBuilder concatenated = new StringBuilder (content );
176-
177- // Look ahead to find more chunks to concatenate
178- // TODO: Lookback to get the preceding chunk if we're at the end of the document
179- for (int nextIndex = i + 1 ; nextIndex < chunks .size () && concatenated .length () < fragmentCharSize ; nextIndex ++) {
180- if (consumedChunks [nextIndex ]) {
181- continue ;
176+ var cur = chunk .offset ();
177+ if (cur != null ) {
178+ int prevIdx = -1 , nextIdx = -1 ;
179+ long bestPrevEnd = Long .MIN_VALUE ; // nearest previous by largest end()
180+ long bestNextStart = Long .MAX_VALUE ; // nearest next by smallest start()
181+
182+ for (int j = 0 ; j < chunks .size (); j ++) {
183+ if (j == i || consumedChunks [j ]) continue ;
184+
185+ var cand = chunks .get (j );
186+ var off = cand .offset ();
187+ if (off == null ) continue ;
188+
189+ if (off .end () <= cur .start ()) {
190+ // candidate is before current
191+ if (off .end () > bestPrevEnd ) {
192+ bestPrevEnd = off .end ();
193+ prevIdx = j ;
194+ }
195+ } else if (off .start () >= cur .end ()) {
196+ // candidate is after current
197+ if (off .start () < bestNextStart ) {
198+ bestNextStart = off .start ();
199+ nextIdx = j ;
200+ }
201+ }
182202 }
183203
184- var nextChunk = chunks .get (nextIndex );
185- if (nextChunk .offset ().start () > chunk .offset ().end ()) {
186- String nextContent = offsetToContent .apply (nextChunk );
187- if (nextContent == null ) {
188- continue ;
204+ double prevScore = (prevIdx != -1 ) ? chunks .get (prevIdx ).score () : Double .NEGATIVE_INFINITY ;
205+ double nextScore = (nextIdx != -1 ) ? chunks .get (nextIdx ).score () : Double .NEGATIVE_INFINITY ;
206+
207+ int pick = (nextScore > prevScore ) ? nextIdx : prevIdx ;
208+ if (pick != -1 ) {
209+ String extra = offsetToContent .apply (chunks .get (pick ));
210+ int remaining = fragmentCharSize - content .length () - CHUNK_DELIMITER .length ();
211+ if (extra != null ) {
212+ if (pick == nextIdx ) {
213+ String toAppend = extra .length () > remaining ? extra .substring (0 , remaining ) : extra ;
214+ content = content + CHUNK_DELIMITER + toAppend ; // expand forward
215+ } else {
216+ String toPrepend = extra .length () > remaining ? extra .substring (extra .length () - remaining ) : extra ;
217+ content = toPrepend + CHUNK_DELIMITER + content ; // expand backward
218+ }
219+ consumedChunks [pick ] = true ;
189220 }
190-
191- concatenated .append (CHUNK_DELIMITER ).append (nextContent );
192- consumedChunks [nextIndex ] = true ;
193221 }
194222 }
195-
196- content = concatenated .toString ();
197223 }
198224
199225 // Truncate content if it exceeds fragmentCharSize
0 commit comments