Skip to content

Commit de46fef

Browse files
committed
Fix char encoding bug for text fields (not semantic_text)
1 parent 35120e6 commit de46fef

File tree

1 file changed

+3
-0
lines changed

1 file changed

+3
-0
lines changed

server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightSnippetUtils.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,9 @@ public static SearchHighlightContext.Field buildFieldHighlightContextForSnippets
4848
SearchHighlightContext.FieldOptions.Builder optionsBuilder = new SearchHighlightContext.FieldOptions.Builder();
4949
optionsBuilder.numberOfFragments(numSnippets);
5050
optionsBuilder.fragmentCharSize(snippetCharLength);
51+
// Note: The default SENTENCE boundary scanner used by the DefaultHighlighter will return fragments larger than the specified
52+
// snippetLength. This has implications when appending and calculating ByteArrays, so we specify WORD.
53+
optionsBuilder.boundaryScannerType(HighlightBuilder.BoundaryScannerType.WORD);
5154
optionsBuilder.noMatchSize(snippetCharLength);
5255
optionsBuilder.preTags(new String[] { "" });
5356
optionsBuilder.postTags(new String[] { "" });

0 commit comments

Comments
 (0)