Skip to content

Commit 88eb687

Browse files
Add ability to set "max_analyzed_offset" implicitly to "index.highlight (#118895) (#121323)
Add ability to set "max_analyzed_offet" implicitly to "index.highlight .max_analyzed_offset", by setting it excplicitly to "-1". Closes #112822 (cherry picked from commit 93c349c)
1 parent a448623 commit 88eb687

File tree

15 files changed

+182
-29
lines changed

15 files changed

+182
-29
lines changed

docs/reference/search/search-your-data/highlighting.asciidoc

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -276,9 +276,11 @@ max_analyzed_offset:: By default, the maximum number of characters
276276
analyzed for a highlight request is bounded by the value defined in the
277277
<<index-max-analyzed-offset, `index.highlight.max_analyzed_offset`>> setting,
278278
and when the number of characters exceeds this limit an error is returned. If
279-
this setting is set to a non-negative value, the highlighting stops at this defined
279+
this setting is set to a positive value, the highlighting stops at this defined
280280
maximum limit, and the rest of the text is not processed, thus not highlighted and
281-
no error is returned. The <<max-analyzed-offset, `max_analyzed_offset`>> query setting
281+
no error is returned. If it is specifically set to -1 then the value of
282+
<<index-max-analyzed-offset, `index.highlight.max_analyzed_offset`>> is used instead.
283+
For values < -1 or 0, an error is returned. The <<max-analyzed-offset, `max_analyzed_offset`>> query setting
282284
does *not* override the <<index-max-analyzed-offset, `index.highlight.max_analyzed_offset`>>
283285
which prevails when it's set to lower value than the query setting.
284286

plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextHighlighter.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText;
1818
import org.elasticsearch.index.query.SearchExecutionContext;
1919
import org.elasticsearch.lucene.search.uhighlight.CustomUnifiedHighlighter;
20+
import org.elasticsearch.lucene.search.uhighlight.QueryMaxAnalyzedOffset;
2021
import org.elasticsearch.search.fetch.FetchSubPhase.HitContext;
2122
import org.elasticsearch.search.fetch.subphase.highlight.DefaultHighlighter;
2223
import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext;
@@ -52,7 +53,7 @@ protected List<Object> loadFieldValues(
5253
}
5354

5455
@Override
55-
protected Analyzer wrapAnalyzer(Analyzer analyzer, Integer maxAnalyzedOffset) {
56+
protected Analyzer wrapAnalyzer(Analyzer analyzer, QueryMaxAnalyzedOffset maxAnalyzedOffset) {
5657
return new AnnotatedHighlighterAnalyzer(super.wrapAnalyzer(analyzer, maxAnalyzedOffset));
5758
}
5859

plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextHighlighterTests.java

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText;
4040
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotationAnalyzerWrapper;
4141
import org.elasticsearch.lucene.search.uhighlight.CustomUnifiedHighlighter;
42+
import org.elasticsearch.lucene.search.uhighlight.QueryMaxAnalyzedOffset;
4243
import org.elasticsearch.lucene.search.uhighlight.Snippet;
4344
import org.elasticsearch.search.fetch.subphase.highlight.LimitTokenOffsetAnalyzer;
4445
import org.elasticsearch.test.ESTestCase;
@@ -85,7 +86,7 @@ private void assertHighlightOneDoc(
8586
int noMatchSize,
8687
String[] expectedPassages,
8788
int maxAnalyzedOffset,
88-
Integer queryMaxAnalyzedOffset
89+
Integer queryMaxAnalyzedOffsetIn
8990
) throws Exception {
9091

9192
try (Directory dir = newDirectory()) {
@@ -116,8 +117,9 @@ private void assertHighlightOneDoc(
116117
for (int i = 0; i < markedUpInputs.length; i++) {
117118
annotations[i] = AnnotatedText.parse(markedUpInputs[i]);
118119
}
120+
QueryMaxAnalyzedOffset queryMaxAnalyzedOffset = QueryMaxAnalyzedOffset.create(queryMaxAnalyzedOffsetIn, maxAnalyzedOffset);
119121
if (queryMaxAnalyzedOffset != null) {
120-
wrapperAnalyzer = new LimitTokenOffsetAnalyzer(wrapperAnalyzer, queryMaxAnalyzedOffset);
122+
wrapperAnalyzer = new LimitTokenOffsetAnalyzer(wrapperAnalyzer, queryMaxAnalyzedOffset.getNotNull());
121123
}
122124
AnnotatedHighlighterAnalyzer hiliteAnalyzer = new AnnotatedHighlighterAnalyzer(wrapperAnalyzer);
123125
hiliteAnalyzer.setAnnotations(annotations);
@@ -311,6 +313,19 @@ public void testExceedMaxAnalyzedOffset() throws Exception {
311313
e.getMessage()
312314
);
313315

316+
// Same as before, but force using index maxOffset (20) as queryMaxOffset by passing -1.
317+
assertHighlightOneDoc(
318+
"text",
319+
new String[] { "[Long Text exceeds](Long+Text+exceeds) MAX analyzed offset)" },
320+
query,
321+
Locale.ROOT,
322+
breakIterator,
323+
0,
324+
new String[] { "Long Text [exceeds](_hit_term=exceeds) MAX analyzed offset)" },
325+
20,
326+
-1
327+
);
328+
314329
assertHighlightOneDoc(
315330
"text",
316331
new String[] { "[Long Text Exceeds](Long+Text+Exceeds) MAX analyzed offset [Long Text Exceeds](Long+Text+Exceeds)" },

rest-api-spec/build.gradle

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,7 @@ tasks.named("yamlRestTestV7CompatTransform").configure({ task ->
254254
task.skipTest("logsdb/20_source_mapping/stored _source mode is supported", "no longer serialize source_mode")
255255
task.skipTest("logsdb/20_source_mapping/include/exclude is supported with stored _source", "no longer serialize source_mode")
256256
task.skipTest("logsdb/20_source_mapping/synthetic _source is default", "no longer serialize source_mode")
257+
task.skipTest("search.highlight/30_max_analyzed_offset/Plain highlighter with max_analyzed_offset < 0 should FAIL", "semantics of test has changed")
257258
task.skipTest("search/520_fetch_fields/fetch _seq_no via fields", "error code is changed from 5xx to 400 in 9.0")
258259
task.skipTest("search.vectors/41_knn_search_bbq_hnsw/Test knn search", "Scoring has changed in latest versions")
259260
task.skipTest("search.vectors/42_knn_search_bbq_flat/Test knn search", "Scoring has changed in latest versions")

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.highlight/30_max_analyzed_offset.yml

Lines changed: 60 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,12 +115,70 @@ setup:
115115
- match: {hits.hits.0.highlight.field2.0: "The quick brown <em>fox</em> went to the forest and saw another fox."}
116116

117117
---
118-
"Plain highlighter with max_analyzed_offset < 0 should FAIL":
118+
"Plain highlighter on a field WITH OFFSETS exceeding index.highlight.max_analyzed_offset with max_analyzed_offset=0 should FAIL":
119+
120+
- requires:
121+
test_runner_features: [capabilities]
122+
capabilities:
123+
- method: GET
124+
path: /_search
125+
capabilities: [ highlight_max_analyzed_offset_default ]
126+
reason: Behavior of max_analyzed_offset query param changed in 8.18.
127+
128+
- do:
129+
catch: bad_request
130+
search:
131+
rest_total_hits_as_int: true
132+
index: test1
133+
body: {"query" : {"match" : {"field2" : "fox"}}, "highlight" : {"type" : "plain", "fields" : {"field2" : {}}, "max_analyzed_offset": 0}}
134+
- match: { status: 400 }
135+
- match: { error.root_cause.0.type: "x_content_parse_exception" }
136+
- match: { error.caused_by.type: "illegal_argument_exception" }
137+
- match: { error.caused_by.reason: "[max_analyzed_offset] must be a positive integer, or -1" }
138+
139+
---
140+
"Plain highlighter on a field WITH OFFSETS exceeding index.highlight.max_analyzed_offset with max_analyzed_offset=1 should SUCCEED":
119141

120142
- requires:
121143
cluster_features: ["gte_v7.12.0"]
122144
reason: max_analyzed_offset query param added in 7.12.0
123145

146+
- do:
147+
search:
148+
rest_total_hits_as_int: true
149+
index: test1
150+
body: {"query" : {"match" : {"field2" : "fox"}}, "highlight" : {"type" : "plain", "fields" : {"field2" : {}}, "max_analyzed_offset": 1}}
151+
- match: { hits.hits.0.highlight: null }
152+
153+
---
154+
"Plain highlighter with max_analyzed_offset = -1 default to index analyze offset should SUCCEED":
155+
156+
- requires:
157+
test_runner_features: [capabilities]
158+
capabilities:
159+
- method: GET
160+
path: /_search
161+
capabilities: [ highlight_max_analyzed_offset_default ]
162+
reason: Behavior of max_analyzed_offset query param changed in 8.18.
163+
164+
- do:
165+
search:
166+
rest_total_hits_as_int: true
167+
index: test1
168+
body: {"query" : {"match" : {"field2" : "fox"}}, "highlight" : {"type" : "plain", "fields" : {"field2" : {}}, "max_analyzed_offset": -1}}
169+
- match: {hits.hits.0.highlight.field2.0: "The quick brown <em>fox</em> went to the forest and saw another fox."}
170+
171+
---
172+
"Plain highlighter with max_analyzed_offset < -1 should FAIL":
173+
174+
- requires:
175+
test_runner_features: [capabilities]
176+
capabilities:
177+
- method: GET
178+
path: /_search
179+
capabilities: [ highlight_max_analyzed_offset_default ]
180+
reason: Behavior of max_analyzed_offset query param changed in 8.18.
181+
124182
- do:
125183
catch: bad_request
126184
search:
@@ -130,4 +188,4 @@ setup:
130188
- match: { status: 400 }
131189
- match: { error.root_cause.0.type: "x_content_parse_exception" }
132190
- match: { error.caused_by.type: "illegal_argument_exception" }
133-
- match: { error.caused_by.reason: "[max_analyzed_offset] must be a positive integer" }
191+
- match: { error.caused_by.reason: "[max_analyzed_offset] must be a positive integer, or -1" }

server/src/internalClusterTest/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2674,6 +2674,41 @@ public void testPostingsHighlighterOrderByScore() throws Exception {
26742674
});
26752675
}
26762676

2677+
public void testMaxQueryOffsetDefault() throws Exception {
2678+
assertAcked(
2679+
prepareCreate("test").setMapping(type1PostingsffsetsMapping())
2680+
.setSettings(Settings.builder().put("index.highlight.max_analyzed_offset", "10").build())
2681+
);
2682+
ensureGreen();
2683+
2684+
prepareIndex("test").setSource(
2685+
"field1",
2686+
new String[] {
2687+
"This sentence contains one match, not that short. This sentence contains zero sentence matches. "
2688+
+ "This one contains no matches.",
2689+
"This is the second value's first sentence. This one contains no matches. "
2690+
+ "This sentence contains three sentence occurrences (sentence).",
2691+
"One sentence match here and scored lower since the text is quite long, not that appealing. "
2692+
+ "This one contains no matches." }
2693+
).get();
2694+
refresh();
2695+
2696+
// Specific for this test: by passing "-1" as "maxAnalyzedOffset", the index highlight setting above will be used.
2697+
SearchSourceBuilder source = searchSource().query(termQuery("field1", "sentence"))
2698+
.highlighter(highlight().field("field1").order("score").maxAnalyzedOffset(-1));
2699+
2700+
assertResponse(client().search(new SearchRequest("test").source(source)), response -> {
2701+
Map<String, HighlightField> highlightFieldMap = response.getHits().getAt(0).getHighlightFields();
2702+
assertThat(highlightFieldMap.size(), equalTo(1));
2703+
HighlightField field1 = highlightFieldMap.get("field1");
2704+
assertThat(field1.fragments().length, equalTo(1));
2705+
assertThat(
2706+
field1.fragments()[0].string(),
2707+
equalTo("This <em>sentence</em> contains one match, not that short. This sentence contains zero sentence matches.")
2708+
);
2709+
});
2710+
}
2711+
26772712
public void testPostingsHighlighterEscapeHtml() throws Exception {
26782713
assertAcked(prepareCreate("test").setMapping("title", "type=text," + randomStoreField() + "index_options=offsets"));
26792714

server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomFieldHighlighter.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ class CustomFieldHighlighter extends FieldHighlighter {
3434
private final Locale breakIteratorLocale;
3535
private final int noMatchSize;
3636
private String fieldValue;
37-
private final Integer queryMaxAnalyzedOffset;
37+
private final QueryMaxAnalyzedOffset queryMaxAnalyzedOffset;
3838

3939
CustomFieldHighlighter(
4040
String field,
@@ -47,7 +47,7 @@ class CustomFieldHighlighter extends FieldHighlighter {
4747
PassageFormatter passageFormatter,
4848
Comparator<Passage> passageSortComparator,
4949
int noMatchSize,
50-
Integer queryMaxAnalyzedOffset
50+
QueryMaxAnalyzedOffset queryMaxAnalyzedOffset
5151
) {
5252
super(
5353
field,
@@ -113,7 +113,7 @@ protected Passage[] getSummaryPassagesNoHighlight(int maxPassages) {
113113
@Override
114114
protected Passage[] highlightOffsetsEnums(OffsetsEnum off) throws IOException {
115115
if (queryMaxAnalyzedOffset != null) {
116-
off = new LimitedOffsetsEnum(off, queryMaxAnalyzedOffset);
116+
off = new LimitedOffsetsEnum(off, queryMaxAnalyzedOffset.getNotNull());
117117
}
118118
return super.highlightOffsetsEnums(off);
119119
}

server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighter.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ public final class CustomUnifiedHighlighter extends UnifiedHighlighter {
6666
private final int noMatchSize;
6767
private final CustomFieldHighlighter fieldHighlighter;
6868
private final int maxAnalyzedOffset;
69-
private final Integer queryMaxAnalyzedOffset;
69+
private final QueryMaxAnalyzedOffset queryMaxAnalyzedOffset;
7070

7171
/**
7272
* Creates a new instance of {@link CustomUnifiedHighlighter}
@@ -94,7 +94,7 @@ public CustomUnifiedHighlighter(
9494
int noMatchSize,
9595
int maxPassages,
9696
int maxAnalyzedOffset,
97-
Integer queryMaxAnalyzedOffset,
97+
QueryMaxAnalyzedOffset queryMaxAnalyzedOffset,
9898
boolean requireFieldMatch,
9999
boolean weightMatchesEnabled
100100
) {
@@ -125,9 +125,9 @@ public Snippet[] highlightField(LeafReader reader, int docId, CheckedSupplier<St
125125
return null;
126126
}
127127
int fieldValueLength = fieldValue.length();
128-
if (((queryMaxAnalyzedOffset == null || queryMaxAnalyzedOffset > maxAnalyzedOffset)
128+
if ((queryMaxAnalyzedOffset == null || queryMaxAnalyzedOffset.getNotNull() > maxAnalyzedOffset)
129129
&& (getOffsetSource(field) == OffsetSource.ANALYSIS)
130-
&& (fieldValueLength > maxAnalyzedOffset))) {
130+
&& (fieldValueLength > maxAnalyzedOffset)) {
131131
throw new IllegalArgumentException(
132132
"The length ["
133133
+ fieldValueLength
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.lucene.search.uhighlight;
11+
12+
public class QueryMaxAnalyzedOffset {
13+
private final int queryMaxAnalyzedOffset;
14+
15+
private QueryMaxAnalyzedOffset(final int queryMaxAnalyzedOffset) {
16+
// If we have a negative value, grab value for the actual maximum from the index.
17+
this.queryMaxAnalyzedOffset = queryMaxAnalyzedOffset;
18+
}
19+
20+
public static QueryMaxAnalyzedOffset create(final Integer queryMaxAnalyzedOffset, final int indexMaxAnalyzedOffset) {
21+
if (queryMaxAnalyzedOffset == null) {
22+
return null;
23+
}
24+
return new QueryMaxAnalyzedOffset(queryMaxAnalyzedOffset < 0 ? indexMaxAnalyzedOffset : queryMaxAnalyzedOffset);
25+
}
26+
27+
public int getNotNull() {
28+
return queryMaxAnalyzedOffset;
29+
}
30+
}

server/src/main/java/org/elasticsearch/rest/action/search/SearchCapabilities.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ private SearchCapabilities() {}
4343
private static final String OPTIMIZED_SCALAR_QUANTIZATION_BBQ = "optimized_scalar_quantization_bbq";
4444
private static final String KNN_QUANTIZED_VECTOR_RESCORE_OVERSAMPLE = "knn_quantized_vector_rescore_oversample";
4545

46+
private static final String HIGHLIGHT_MAX_ANALYZED_OFFSET_DEFAULT = "highlight_max_analyzed_offset_default";
47+
4648
public static final Set<String> CAPABILITIES;
4749
static {
4850
HashSet<String> capabilities = new HashSet<>();
@@ -58,6 +60,7 @@ private SearchCapabilities() {}
5860
capabilities.add(K_DEFAULT_TO_SIZE);
5961
capabilities.add(KQL_QUERY_SUPPORTED);
6062
capabilities.add(RRF_WINDOW_SIZE_SUPPORT_DEPRECATED);
63+
capabilities.add(HIGHLIGHT_MAX_ANALYZED_OFFSET_DEFAULT);
6164
CAPABILITIES = Set.copyOf(capabilities);
6265
}
6366
}

0 commit comments

Comments
 (0)