Skip to content

Commit a29153a

Browse files
committed
🔧 (highlighter) Make translation and text fielfd opt-out
1 parent 7ac83c6 commit a29153a

File tree

4 files changed

+9
-11
lines changed

4 files changed

+9
-11
lines changed

openaleph_search/query/base.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -359,13 +359,11 @@ def get_highlight(self) -> dict[str, Any]:
359359
self.HIGHLIGHT_FIELD, text, self.parser.highlight_count
360360
),
361361
Field.NAMES: get_highlighter(Field.NAMES),
362-
Field.TRANSLATION: get_highlighter(Field.TRANSLATION, text),
363362
}
364-
# text field is a copy_to catch-all; it provides a fallback for
365-
# entities whose content field text is not in _source (e.g.
366-
# HyperText where indexText is popped from properties).
367-
if Field.TEXT not in fields:
363+
if settings.highlighter_text_field and Field.TEXT not in fields:
368364
fields[Field.TEXT] = get_highlighter(Field.TEXT, text)
365+
if settings.highlighter_translation_field:
366+
fields[Field.TRANSLATION] = get_highlighter(Field.TRANSLATION, text)
369367
# Add filter value highlights to the main highlight field only.
370368
# Only highlight filter values that are human-readable text.
371369
# Skip short-code groups (countries, languages, etc.) to avoid

openaleph_search/query/highlight.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ def get_highlighter(
8181
highlighter = {
8282
"type": "unified", # Good for mixed content
8383
"fragment_size": 200, # Longer to capture full names/titles
84-
"number_of_fragments": 3,
84+
"number_of_fragments": 1,
8585
"fragmenter": "simple", # Don't break names awkwardly
8686
"pre_tags": [""], # No markup
8787
"post_tags": [""], # No markup
@@ -93,8 +93,7 @@ def get_highlighter(
9393
if field == Field.NAMES:
9494
return {
9595
"type": "plain",
96-
"number_of_fragments": 3,
97-
"max_analyzed_offset": 999999, # probably many names
96+
"number_of_fragments": 1,
9897
"pre_tags": [""], # No markup
9998
"post_tags": [""], # No markup
10099
}

openaleph_search/settings.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,9 @@ class Settings(BaseSettings):
8686
highlighter_phrase_limit: int = 64
8787
highlighter_boundary_max_scan: int = 100
8888
highlighter_no_match_size: int = 300
89-
highlighter_max_analyzed_offset: int = 999999
89+
highlighter_text_field: bool = True
90+
highlighter_translation_field: bool = True
91+
highlighter_max_analyzed_offset: int = 100000
9092

9193
# More Like This defaults
9294
mlt_min_doc_freq: int = 1

tests/test_search_query.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,8 +143,7 @@ def test_highlight(self):
143143
},
144144
"names": {
145145
"type": "plain",
146-
"number_of_fragments": 3,
147-
"max_analyzed_offset": 999999,
146+
"number_of_fragments": 1,
148147
"pre_tags": [""],
149148
"post_tags": [""],
150149
},

0 commit comments

Comments
 (0)