Skip to content

Commit 7ff7b4a

Browse files
committed
HSEARCH-5159 Use char array for highlighters
1 parent 8bd7f23 commit 7ff7b4a

File tree

8 files changed

+78
-19
lines changed

8 files changed

+78
-19
lines changed

backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/highlighter/impl/LuceneAbstractSearchHighlighter.java

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -59,9 +59,11 @@ public abstract class LuceneAbstractSearchHighlighter implements SearchHighlight
5959
"<em class=\"hlt10\">"
6060
);
6161
private static final List<String> STYLED_SCHEMA_POST_TAGS = DEFAULT_POST_TAGS;
62+
private static final char[] DEFAULT_BOUNDARY_CHARS = { '.', ',', '!', '?', ' ', '\t', '\n' };
63+
private static final int DEFAULT_MAX_SCAN = SimpleBoundaryScanner.DEFAULT_MAX_SCAN;
6264

6365
protected final Set<String> indexNames;
64-
protected final Character[] boundaryChars;
66+
protected final char[] boundaryChars;
6567
protected final Integer boundaryMaxScan;
6668
protected final Integer fragmentSize;
6769
protected final Integer noMatchSize;
@@ -93,8 +95,8 @@ protected LuceneAbstractSearchHighlighter(Builder builder) {
9395
protected LuceneAbstractSearchHighlighter(BoundaryScannerType scannerType) {
9496
this(
9597
Collections.emptySet(),
96-
SimpleBoundaryScanner.DEFAULT_BOUNDARY_CHARS,
97-
SimpleBoundaryScanner.DEFAULT_MAX_SCAN,
98+
DEFAULT_BOUNDARY_CHARS,
99+
DEFAULT_MAX_SCAN,
98100
100,
99101
0,
100102
5,
@@ -110,7 +112,7 @@ protected LuceneAbstractSearchHighlighter(BoundaryScannerType scannerType) {
110112
}
111113

112114
protected LuceneAbstractSearchHighlighter(Set<String> indexNames,
113-
Character[] boundaryChars,
115+
char[] boundaryChars,
114116
Integer boundaryMaxScan,
115117
Integer fragmentSize, Integer noMatchSize, Integer numberOfFragments, Boolean orderByScore,
116118
List<String> preTags, List<String> postTags, BoundaryScannerType boundaryScannerType,
@@ -176,7 +178,7 @@ public LuceneAbstractSearchHighlighter withFallback(LuceneAbstractSearchHighligh
176178
}
177179

178180
protected abstract LuceneAbstractSearchHighlighter createHighlighterSameType(Set<String> indexNames,
179-
Character[] boundaryChars,
181+
char[] boundaryChars,
180182
Integer boundaryMaxScan,
181183
Integer fragmentSize, Integer noMatchSize, Integer numberOfFragments, Boolean orderByScore,
182184
List<String> preTags, List<String> postTags, BoundaryScannerType boundaryScannerType,

backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/highlighter/impl/LuceneFastVectorSearchHighlighter.java

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ protected LuceneFastVectorSearchHighlighter(Builder builder) {
5555
}
5656

5757
private LuceneFastVectorSearchHighlighter(Set<String> indexNames,
58-
Character[] boundaryChars, Integer boundaryMaxScan, Integer fragmentSize, Integer noMatchSize,
58+
char[] boundaryChars, Integer boundaryMaxScan, Integer fragmentSize, Integer noMatchSize,
5959
Integer numberOfFragments, Boolean orderByScore, List<String> preTags,
6060
List<String> postTags, BoundaryScannerType boundaryScannerType, Locale boundaryScannerLocale,
6161
HighlighterFragmenter fragmenterType,
@@ -69,7 +69,7 @@ private LuceneFastVectorSearchHighlighter(Set<String> indexNames,
6969

7070
@Override
7171
protected LuceneAbstractSearchHighlighter createHighlighterSameType(Set<String> indexNames,
72-
Character[] boundaryChars, Integer boundaryMaxScan, Integer fragmentSize, Integer noMatchSize,
72+
char[] boundaryChars, Integer boundaryMaxScan, Integer fragmentSize, Integer noMatchSize,
7373
Integer numberOfFragments, Boolean orderByScore, List<String> preTags,
7474
List<String> postTags, BoundaryScannerType boundaryScannerType, Locale boundaryScannerLocale,
7575
HighlighterFragmenter fragmenterType, Integer phraseLimit, Encoder encoder) {
@@ -146,7 +146,7 @@ private final class FastVectorHighlighterValues<A, T> extends HighlighterValues<
146146
private BoundaryScanner boundaryScanner() {
147147
switch ( LuceneFastVectorSearchHighlighter.this.boundaryScannerType ) {
148148
case CHARS:
149-
return new SimpleBoundaryScanner(
149+
return new HibernateSearchSimpleBoundaryScanner(
150150
LuceneFastVectorSearchHighlighter.this.boundaryMaxScan,
151151
LuceneFastVectorSearchHighlighter.this.boundaryChars
152152
);
@@ -237,4 +237,20 @@ public List<FieldFragList.WeightedFragInfo> getWeightedFragInfoList(List<FieldFr
237237
}
238238
}
239239

240+
// TODO: HSEARCH-5160 remove this "custom" class and use the Lucene's constructor for the char[]
241+
private static class HibernateSearchSimpleBoundaryScanner extends SimpleBoundaryScanner {
242+
243+
public HibernateSearchSimpleBoundaryScanner(int boundaryMaxScan, char[] boundaryChars) {
244+
super( boundaryMaxScan, characters( boundaryChars ) );
245+
}
246+
247+
private static Character[] characters(char[] chars) {
248+
Character[] result = new Character[chars.length];
249+
for ( int i = 0; i < chars.length; i++ ) {
250+
result[i] = chars[i];
251+
}
252+
return result;
253+
}
254+
}
255+
240256
}

backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/highlighter/impl/LucenePlainSearchHighlighter.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ protected LucenePlainSearchHighlighter(Builder builder) {
5656
}
5757

5858
private LucenePlainSearchHighlighter(Set<String> indexNames,
59-
Character[] boundaryChars, Integer boundaryMaxScan, Integer fragmentSize, Integer noMatchSize,
59+
char[] boundaryChars, Integer boundaryMaxScan, Integer fragmentSize, Integer noMatchSize,
6060
Integer numberOfFragments, Boolean orderByScore, List<String> preTags,
6161
List<String> postTags, BoundaryScannerType boundaryScannerType, Locale boundaryScannerLocale,
6262
HighlighterFragmenter fragmenterType, Integer phraseLimit,
@@ -70,7 +70,7 @@ private LucenePlainSearchHighlighter(Set<String> indexNames,
7070

7171
@Override
7272
protected LuceneAbstractSearchHighlighter createHighlighterSameType(Set<String> indexNames,
73-
Character[] boundaryChars, Integer boundaryMaxScan, Integer fragmentSize, Integer noMatchSize,
73+
char[] boundaryChars, Integer boundaryMaxScan, Integer fragmentSize, Integer noMatchSize,
7474
Integer numberOfFragments, Boolean orderByScore, List<String> preTags,
7575
List<String> postTags, BoundaryScannerType boundaryScannerType, Locale boundaryScannerLocale,
7676
HighlighterFragmenter fragmenterType, Integer phraseLimit, Encoder encoder) {

backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/highlighter/impl/LuceneUnifiedSearchHighlighter.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ protected LuceneUnifiedSearchHighlighter(Builder builder) {
5050
}
5151

5252
private LuceneUnifiedSearchHighlighter(Set<String> indexNames,
53-
Character[] boundaryChars, Integer boundaryMaxScan, Integer fragmentSize, Integer noMatchSize,
53+
char[] boundaryChars, Integer boundaryMaxScan, Integer fragmentSize, Integer noMatchSize,
5454
Integer numberOfFragments, Boolean orderByScore, List<String> preTags,
5555
List<String> postTags, BoundaryScannerType boundaryScannerType, Locale boundaryScannerLocale,
5656
HighlighterFragmenter fragmenterType,
@@ -73,7 +73,7 @@ private LuceneUnifiedSearchHighlighter(Set<String> indexNames,
7373

7474
@Override
7575
protected LuceneAbstractSearchHighlighter createHighlighterSameType(Set<String> indexNames,
76-
Character[] boundaryChars, Integer boundaryMaxScan, Integer fragmentSize, Integer noMatchSize,
76+
char[] boundaryChars, Integer boundaryMaxScan, Integer fragmentSize, Integer noMatchSize,
7777
Integer numberOfFragments, Boolean orderByScore, List<String> preTags,
7878
List<String> postTags, BoundaryScannerType boundaryScannerType, Locale boundaryScannerLocale,
7979
HighlighterFragmenter fragmenterType, Integer phraseLimit, Encoder encoder) {

engine/src/main/java/org/hibernate/search/engine/search/highlighter/dsl/HighlighterBoundaryScannerFastVectorHighlighterOptionsStep.java

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,22 @@ public interface HighlighterBoundaryScannerFastVectorHighlighterOptionsStep<T ex
4848
* each character in the string will be considered as a boundary character.
4949
* @return The next step in a highlighter definition.
5050
*/
51-
HighlighterBoundaryScannerFastVectorHighlighterOptionsStep<T> boundaryChars(Character[] boundaryChars);
51+
default HighlighterBoundaryScannerFastVectorHighlighterOptionsStep<T> boundaryChars(Character[] boundaryChars) {
52+
char[] chars = new char[boundaryChars.length];
53+
for ( int i = 0; i < boundaryChars.length; i++ ) {
54+
chars[i] = boundaryChars[i];
55+
}
56+
return boundaryChars( chars );
57+
}
58+
59+
/**
60+
* Specify a set of characters to look for when scanning for boundaries when
61+
* a {@link HighlighterBoundaryScannerTypeFastVectorHighlighterStep#chars() characters boundary scanner} is used.
62+
*
63+
* @param boundaryChars An array containing all boundary characters. The order doesn't matter:
64+
* each character in the string will be considered as a boundary character.
65+
* @return The next step in a highlighter definition.
66+
*/
67+
HighlighterBoundaryScannerFastVectorHighlighterOptionsStep<T> boundaryChars(char... boundaryChars);
5268

5369
}

engine/src/main/java/org/hibernate/search/engine/search/highlighter/dsl/impl/HighlighterFastVectorHighlighterOptionsStepImpl.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ HighlighterFastVectorHighlighterOptionsStep> boundaryChars(
118118

119119
@Override
120120
public HighlighterBoundaryScannerFastVectorHighlighterOptionsStep<
121-
HighlighterFastVectorHighlighterOptionsStep> boundaryChars(Character[] boundaryChars) {
121+
HighlighterFastVectorHighlighterOptionsStep> boundaryChars(char... boundaryChars) {
122122
HighlighterFastVectorHighlighterOptionsStepImpl.this.highlighterBuilder.boundaryChars( boundaryChars );
123123
return this;
124124
}

engine/src/main/java/org/hibernate/search/engine/search/highlighter/spi/SearchHighlighterBuilder.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
public abstract class SearchHighlighterBuilder {
2121

2222
protected SearchHighlighterType type;
23-
protected Character[] boundaryChars;
23+
protected char[] boundaryChars;
2424
protected Integer boundaryMaxScan;
2525
protected Integer fragmentSize;
2626
protected Integer noMatchSize;
@@ -45,16 +45,16 @@ public SearchHighlighterBuilder boundaryChars(String boundaryChars) {
4545
this.boundaryChars = null;
4646
}
4747
else {
48-
this.boundaryChars = new Character[boundaryChars.length()];
48+
this.boundaryChars = new char[boundaryChars.length()];
4949
for ( int i = 0; i < boundaryChars.length(); i++ ) {
50-
this.boundaryChars[i] = Character.valueOf( boundaryChars.charAt( i ) );
50+
this.boundaryChars[i] = boundaryChars.charAt( i );
5151
}
5252
}
5353

5454
return this;
5555
}
5656

57-
public SearchHighlighterBuilder boundaryChars(Character[] boundaryChars) {
57+
public SearchHighlighterBuilder boundaryChars(char[] boundaryChars) {
5858
this.boundaryChars = boundaryChars;
5959
return this;
6060
}
@@ -159,7 +159,7 @@ public SearchHighlighterType type() {
159159
return type;
160160
}
161161

162-
public Character[] boundaryChars() {
162+
public char[] boundaryChars() {
163163
return boundaryChars;
164164
}
165165

integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/highlight/HighlighterFastVectorIT.java

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,31 @@ void boundaryCharacters() {
276276
);
277277
}
278278

279+
@Test
280+
void boundaryCharactersAsCharArray() {
281+
StubMappingScope scope = index.createScope();
282+
283+
SearchQuery<List<String>> highlights = scope.query().select(
284+
f -> f.highlight( "string" )
285+
)
286+
.where( f -> f.match().field( "string" ).matching( "useless" ) )
287+
.highlighter( h2 -> h2.fastVector()
288+
.fragmentSize( 20 )
289+
.boundaryScanner()
290+
.chars()
291+
.locale( Locale.ENGLISH )
292+
.boundaryChars( '-' )
293+
.boundaryMaxScan( 25 )
294+
.end()
295+
)
296+
.toQuery();
297+
298+
assertThatHits( highlights.fetchAllHits() )
299+
.hasHitsAnyOrder(
300+
Arrays.asList( " to some <em>useless</em> text in between time to see " )
301+
);
302+
}
303+
279304
@Test
280305
void boundaryCharactersAsArray() {
281306
StubMappingScope scope = index.createScope();

0 commit comments

Comments
 (0)