Skip to content

Commit b06bcb1

Browse files
committed
🔧 (highlighting) Disable FVH per default
1 parent a29153a commit b06bcb1

File tree

2 files changed

+21
-10
lines changed

2 files changed

+21
-10
lines changed

openaleph_search/settings.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ class Settings(BaseSettings):
8080
content_term_vectors: bool = True
8181

8282
# Highlighter configuration
83-
highlighter_fvh_enabled: bool = True
83+
highlighter_fvh_enabled: bool = False
8484
highlighter_fragment_size: int = 200
8585
highlighter_number_of_fragments: int = 3
8686
highlighter_phrase_limit: int = 64

tests/test_highlighting.py

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -25,22 +25,26 @@ def _search_highlight(
2525
def test_highlighting_phrases(index_entities):
2626
# this is using the sample entity 242d6724b38425f11df37437c38125b71fb13300
2727
highlight = _search_highlight('"Mr. Trump proclaimed"')
28-
assert "<em>Mr. Trump proclaimed</em>" in highlight
28+
# Unified highlighter highlights individual terms rather than full phrase spans
29+
assert "<em>Mr</em>" in highlight
30+
assert "<em>Trump</em>" in highlight
31+
assert "<em>proclaimed</em>" in highlight
2932

3033
highlight = _search_highlight('"former chairman"~2')
3134
assert "<em>former</em>" in highlight
3235
assert "<em>chairman</em>" in highlight
3336

3437
highlight = _search_highlight('"paul manafort"')
3538
assert highlight is not None
36-
assert "<em>Paul Manafort</em>" in highlight
39+
assert "<em>Paul</em>" in highlight
40+
assert "<em>Manafort</em>" in highlight
3741

3842
highlight = _search_highlight("Українська")
3943
assert highlight is not None
4044
assert "<em>Українська" in highlight
4145
highlight = _search_highlight('"日本語"')
4246
assert highlight is not None
43-
assert "<em>本</em><em>語" in highlight
47+
assert "<em>日本語" in highlight
4448

4549

4650
def test_highlighting_pages(fixture_pages, cleanup_after):
@@ -49,11 +53,14 @@ def test_highlighting_pages(fixture_pages, cleanup_after):
4953
index_bulk("test_pages_highlight", fixture_pages, sync=True)
5054

5155
# Search in the indexText (-> content) of a Pages (aka Document) entity
56+
# Unified highlighter highlights individual terms rather than full phrase spans
5257
highlight = _search_highlight(
5358
'"MIT license" "useful information" documentation', schema="Pages"
5459
)
55-
assert "<em>MIT license</em>" in highlight
56-
assert "<em>useful information</em>" in highlight
60+
assert "<em>MIT</em>" in highlight
61+
assert "<em>license</em>" in highlight
62+
assert "<em>useful</em>" in highlight
63+
assert "<em>information</em>" in highlight
5764
assert "<em>documentation</em>" in highlight
5865

5966
# Search within its child page entities (used in OpenAleph ui)
@@ -62,24 +69,28 @@ def test_highlighting_pages(fixture_pages, cleanup_after):
6269
schema="Page",
6370
parent_id="f61295777cf69f423855655f1614794ce22086d8.b154e50f50c8c8133168767d78bbd1dff067f308",
6471
)
65-
assert "<em>MIT license</em>" in highlight
66-
assert "<em>useful information</em>" in highlight
72+
assert "<em>MIT</em>" in highlight
73+
assert "<em>license</em>" in highlight
74+
assert "<em>useful</em>" in highlight
75+
assert "<em>information</em>" in highlight
6776
assert "<em>documentation</em>" in highlight
6877

6978
# Include mentioned names
7079
highlight = _search_highlight(
7180
'names:"massachusetts institute of technology" "MIT license"', schema="Pages"
7281
)
7382
assert "massachusetts institute of technology" in highlight
74-
assert "<em>MIT license</em>" in highlight
83+
assert "<em>MIT</em>" in highlight
84+
assert "<em>license</em>" in highlight
7585

7686
# Page doesn't contain "names" but still the highlight works for the text phrase
7787
highlight = _search_highlight(
7888
'names:"massachusetts institute of technology" "MIT license"',
7989
schema="Page",
8090
parent_id="f61295777cf69f423855655f1614794ce22086d8.b154e50f50c8c8133168767d78bbd1dff067f308",
8191
)
82-
assert "<em>MIT license</em>" in highlight
92+
assert "<em>MIT</em>" in highlight
93+
assert "<em>license</em>" in highlight
8394

8495

8596
def test_highlighting_translation_plaintext(cleanup_after):

0 commit comments

Comments
 (0)