Skip to content

Commit 5a9dc67

Browse files
committed
fix #2592 add unified highlighter support (#2672)
1 parent 7f395cf commit 5a9dc67

File tree

2 files changed

+70
-5
lines changed

2 files changed

+70
-5
lines changed

src/Nest/Search/Search/Highlighting/HighlighterType.cs

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,27 +12,39 @@ public enum HighlighterType
1212
{
1313
/// <summary>
1414
/// Plain Highlighter.
15-
/// The default choice of highlighter is of type plain and uses the Lucene highlighter.
16-
/// It tries hard to reflect the query matching logic in terms of understanding word
15+
/// The default choice of highlighter is of type plain and uses the Lucene highlighter.
16+
/// It tries hard to reflect the query matching logic in terms of understanding word
1717
/// importance and any word positioning criteria in phrase queries.
1818
/// </summary>
1919
[EnumMember(Value = "plain")]
2020
Plain,
2121

2222
/// <summary>
2323
/// Postings Highlighter.
24-
/// If index_options is set to offsets in the mapping the postings highlighter
24+
/// If index_options is set to offsets in the mapping the postings highlighter
2525
/// will be used instead of the plain highlighter
2626
/// </summary>
2727
[EnumMember(Value = "postings")]
2828
Postings,
2929

3030
/// <summary>
3131
/// Fast Vector Highlighter.
32-
/// If term_vector information is provided by setting term_vector to with_positions_offsets
32+
/// If term_vector information is provided by setting term_vector to with_positions_offsets
3333
/// in the mapping then the fast vector highlighter will be used instead of the plain highlighter
3434
/// </summary>
3535
[EnumMember(Value = "fvh")]
36-
Fvh
36+
Fvh,
37+
38+
39+
/// <summary>
40+
/// Unified Highlighter.
41+
/// The unified highlighter can extract offsets from either postings, term vectors, or via re-analyzing text.
42+
/// Under the hood it uses Lucene UnifiedHighlighter which picks its strategy depending on the field and the query to highlight.
43+
/// Independently of the strategy this highlighter breaks the text into sentences and scores individual sentences as if
44+
/// they were documents in this corpus, using the BM25 algorithm. It supports accurate phrase and multi-term
45+
/// (fuzzy, prefix, regex) highlighting
46+
/// </summary>
47+
[EnumMember(Value = "unified")]
48+
Unified
3749
}
3850
}

src/Tests/Search/Request/HighlightingUsageTests.cs

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ public class HighlightingUsageTests : SearchUsageTestBase
2323
{
2424
public HighlightingUsageTests(ReadOnlyCluster cluster, EndpointUsage usage) : base(cluster, usage) { }
2525

26+
public string LastNameSearch { get; } = Project.Projects.First().LeadDeveloper.LastName;
27+
2628
protected override object ExpectJson => new
2729
{
2830
query = new
@@ -72,6 +74,26 @@ public HighlightingUsageTests(ReadOnlyCluster cluster, EndpointUsage usage) : ba
7274
}
7375
}
7476
},
77+
{ "leadDeveloper.lastName", new JObject
78+
{
79+
{ "type", "unified" },
80+
{ "pre_tags", new JArray { "<name>" } },
81+
{ "post_tags", new JArray { "</name>" } },
82+
{ "highlight_query", new JObject
83+
{
84+
{ "match", new JObject
85+
{
86+
{ "leadDeveloper.lastName", new JObject
87+
{
88+
{ "query", LastNameSearch }
89+
}
90+
}
91+
}
92+
}
93+
}
94+
}
95+
}
96+
},
7597
{ "state.offsets", new JObject
7698
{
7799
{ "type", "postings" },
@@ -122,6 +144,17 @@ public HighlightingUsageTests(ReadOnlyCluster cluster, EndpointUsage usage) : ba
122144
.Query("Kurt Edgardo Naomi Dariana Justice Felton")
123145
)
124146
),
147+
fs => fs
148+
.Field(p => p.LeadDeveloper.LastName)
149+
.Type(HighlighterType.Unified)
150+
.PreTags("<name>")
151+
.PostTags("</name>")
152+
.HighlightQuery(q => q
153+
.Match(m => m
154+
.Field(p => p.LeadDeveloper.LastName)
155+
.Query(LastNameSearch)
156+
)
157+
),
125158
fs => fs
126159
.Field(p => p.State.Suffix("offsets"))
127160
.Type(HighlighterType.Postings)
@@ -175,6 +208,18 @@ public HighlightingUsageTests(ReadOnlyCluster cluster, EndpointUsage usage) : ba
175208
}
176209
}
177210
},
211+
{ "leadDeveloper.lastName", new HighlightField
212+
{
213+
Type = HighlighterType.Unified,
214+
PreTags = new[] { "<name>"},
215+
PostTags = new[] { "</name>"},
216+
HighlightQuery = new MatchQuery
217+
{
218+
Field = "leadDeveloper.lastName",
219+
Query = LastNameSearch
220+
}
221+
}
222+
},
178223
{ "state.offsets", new HighlightField
179224
{
180225
Type = HighlighterType.Postings,
@@ -215,6 +260,14 @@ protected override void ExpectResponse(ISearchResponse<Project> response)
215260
highlight.Should().Contain("</name>");
216261
}
217262
}
263+
else if (highlightField.Key == "leadDeveloper.lastName ")
264+
{
265+
foreach (var highlight in highlightField.Value.Highlights)
266+
{
267+
highlight.Should().Contain("<name>");
268+
highlight.Should().Contain("</name>");
269+
}
270+
}
218271
else if (highlightField.Key == "state.offsets")
219272
{
220273
foreach (var highlight in highlightField.Value.Highlights)

0 commit comments

Comments
 (0)