Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
2faea54
Add ability to set "max_analyzed_offet" implicitly to "index.highlight
svilen-mihaylov-elastic Dec 17, 2024
f26f2c7
Add test
svilen-mihaylov-elastic Jan 2, 2025
b18a696
Tweak tests
svilen-mihaylov-elastic Jan 2, 2025
a6a4623
Merge branch 'main' into svilen-mihaylov/112822
svilen-mihaylov-elastic Jan 2, 2025
f139e85
Fix test and update docs
svilen-mihaylov-elastic Jan 2, 2025
1285b01
Remove logger info
svilen-mihaylov-elastic Jan 2, 2025
eb6725e
Added test for zero
svilen-mihaylov-elastic Jan 2, 2025
4ae0be1
Add a capability
svilen-mihaylov-elastic Jan 3, 2025
c0c2127
Add test for -1
svilen-mihaylov-elastic Jan 3, 2025
c6d2920
Exclude test from REST compatibility
svilen-mihaylov-elastic Jan 3, 2025
703f438
Add Highlight_ to search capability name
svilen-mihaylov-elastic Jan 3, 2025
bf90d9a
Merge branch 'main' into svilen-mihaylov/112822
svilen-mihaylov-elastic Jan 6, 2025
9fc0fcb
Disallow 0
svilen-mihaylov-elastic Jan 6, 2025
6fd85f3
Fix test
svilen-mihaylov-elastic Jan 6, 2025
5abd2c4
Should SUCCEED -> Should FAIL
svilen-mihaylov-elastic Jan 6, 2025
0867480
Oops wrong test
svilen-mihaylov-elastic Jan 6, 2025
f889a61
Rename another test
svilen-mihaylov-elastic Jan 6, 2025
dc54b64
Remove test with =0 for now.
svilen-mihaylov-elastic Jan 6, 2025
909d90f
Restore test with zero
svilen-mihaylov-elastic Jan 6, 2025
7337f1d
Merge branch 'main' into svilen-mihaylov/112822
svilen-mihaylov-elastic Jan 6, 2025
13cda1e
Merge branch 'main' into svilen-mihaylov/112822
svilen-mihaylov-elastic Jan 7, 2025
15bc509
Fix merge
svilen-mihaylov-elastic Jan 7, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions docs/reference/search/search-your-data/highlighting.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -262,9 +262,11 @@ max_analyzed_offset:: By default, the maximum number of characters
analyzed for a highlight request is bounded by the value defined in the
<<index-max-analyzed-offset, `index.highlight.max_analyzed_offset`>> setting,
and when the number of characters exceeds this limit an error is returned. If
this setting is set to a non-negative value, the highlighting stops at this defined
this setting is set to a positive value, the highlighting stops at this defined
maximum limit, and the rest of the text is not processed, thus not highlighted and
no error is returned. The <<max-analyzed-offset, `max_analyzed_offset`>> query setting
no error is returned. If it is specifically set to -1 then the value of
<<index-max-analyzed-offset, `index.highlight.max_analyzed_offset`>> is used instead.
For values < -1 or 0, an error is returned. The <<max-analyzed-offset, `max_analyzed_offset`>> query setting
does *not* override the <<index-max-analyzed-offset, `index.highlight.max_analyzed_offset`>>
which prevails when it's set to lower value than the query setting.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.lucene.search.uhighlight.CustomUnifiedHighlighter;
import org.elasticsearch.lucene.search.uhighlight.QueryMaxAnalyzedOffset;
import org.elasticsearch.search.fetch.FetchSubPhase.HitContext;
import org.elasticsearch.search.fetch.subphase.highlight.DefaultHighlighter;
import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext;
Expand Down Expand Up @@ -52,7 +53,7 @@ protected List<Object> loadFieldValues(
}

@Override
protected Analyzer wrapAnalyzer(Analyzer analyzer, Integer maxAnalyzedOffset) {
protected Analyzer wrapAnalyzer(Analyzer analyzer, QueryMaxAnalyzedOffset maxAnalyzedOffset) {
return new AnnotatedHighlighterAnalyzer(super.wrapAnalyzer(analyzer, maxAnalyzedOffset));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText;
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotationAnalyzerWrapper;
import org.elasticsearch.lucene.search.uhighlight.CustomUnifiedHighlighter;
import org.elasticsearch.lucene.search.uhighlight.QueryMaxAnalyzedOffset;
import org.elasticsearch.lucene.search.uhighlight.Snippet;
import org.elasticsearch.search.fetch.subphase.highlight.LimitTokenOffsetAnalyzer;
import org.elasticsearch.test.ESTestCase;
Expand Down Expand Up @@ -85,7 +86,7 @@ private void assertHighlightOneDoc(
int noMatchSize,
String[] expectedPassages,
int maxAnalyzedOffset,
Integer queryMaxAnalyzedOffset
Integer queryMaxAnalyzedOffsetIn
) throws Exception {

try (Directory dir = newDirectory()) {
Expand Down Expand Up @@ -116,8 +117,9 @@ private void assertHighlightOneDoc(
for (int i = 0; i < markedUpInputs.length; i++) {
annotations[i] = AnnotatedText.parse(markedUpInputs[i]);
}
QueryMaxAnalyzedOffset queryMaxAnalyzedOffset = QueryMaxAnalyzedOffset.create(queryMaxAnalyzedOffsetIn, maxAnalyzedOffset);
if (queryMaxAnalyzedOffset != null) {
wrapperAnalyzer = new LimitTokenOffsetAnalyzer(wrapperAnalyzer, queryMaxAnalyzedOffset);
wrapperAnalyzer = new LimitTokenOffsetAnalyzer(wrapperAnalyzer, queryMaxAnalyzedOffset.getNotNull());
}
AnnotatedHighlighterAnalyzer hiliteAnalyzer = new AnnotatedHighlighterAnalyzer(wrapperAnalyzer);
hiliteAnalyzer.setAnnotations(annotations);
Expand Down Expand Up @@ -311,6 +313,19 @@ public void testExceedMaxAnalyzedOffset() throws Exception {
e.getMessage()
);

// Same as before, but force using index maxOffset (20) as queryMaxOffset by passing -1.
assertHighlightOneDoc(
"text",
new String[] { "[Long Text exceeds](Long+Text+exceeds) MAX analyzed offset)" },
query,
Locale.ROOT,
breakIterator,
0,
new String[] { "Long Text [exceeds](_hit_term=exceeds) MAX analyzed offset)" },
20,
-1
);

assertHighlightOneDoc(
"text",
new String[] { "[Long Text Exceeds](Long+Text+Exceeds) MAX analyzed offset [Long Text Exceeds](Long+Text+Exceeds)" },
Expand Down
1 change: 1 addition & 0 deletions rest-api-spec/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -60,4 +60,5 @@ tasks.named("yamlRestCompatTestTransform").configure ({ task ->
task.skipTest("cat.aliases/10_basic/Deprecated local parameter", "CAT APIs not covered by compatibility policy")
task.skipTest("cat.shards/10_basic/Help", "sync_id is removed in 9.0")
task.skipTest("search/500_date_range/from, to, include_lower, include_upper deprecated", "deprecated parameters are removed in 9.0")
task.skipTest("search.highlight/30_max_analyzed_offset/Plain highlighter with max_analyzed_offset < 0 should FAIL", "semantics of test has changed")
})
Original file line number Diff line number Diff line change
Expand Up @@ -115,12 +115,70 @@ setup:
- match: {hits.hits.0.highlight.field2.0: "The quick brown <em>fox</em> went to the forest and saw another fox."}

---
"Plain highlighter with max_analyzed_offset < 0 should FAIL":
"Plain highlighter on a field WITH OFFSETS exceeding index.highlight.max_analyzed_offset with max_analyzed_offset=0 should FAIL":

- requires:
test_runner_features: [capabilities]
capabilities:
- method: GET
path: /_search
capabilities: [ highlight_max_analyzed_offset_default ]
reason: Behavior of max_analyzed_offset query param changed in 8.18.

- do:
catch: bad_request
search:
rest_total_hits_as_int: true
index: test1
body: {"query" : {"match" : {"field2" : "fox"}}, "highlight" : {"type" : "plain", "fields" : {"field2" : {}}, "max_analyzed_offset": 0}}
- match: { status: 400 }
- match: { error.root_cause.0.type: "x_content_parse_exception" }
- match: { error.caused_by.type: "illegal_argument_exception" }
- match: { error.caused_by.reason: "[max_analyzed_offset] must be a positive integer, or -1" }

---
"Plain highlighter on a field WITH OFFSETS exceeding index.highlight.max_analyzed_offset with max_analyzed_offset=1 should SUCCEED":

- requires:
cluster_features: ["gte_v7.12.0"]
reason: max_analyzed_offset query param added in 7.12.0

- do:
search:
rest_total_hits_as_int: true
index: test1
body: {"query" : {"match" : {"field2" : "fox"}}, "highlight" : {"type" : "plain", "fields" : {"field2" : {}}, "max_analyzed_offset": 1}}
- match: { hits.hits.0.highlight: null }

---
"Plain highlighter with max_analyzed_offset = -1 default to index analyze offset should SUCCEED":

- requires:
test_runner_features: [capabilities]
capabilities:
- method: GET
path: /_search
capabilities: [ highlight_max_analyzed_offset_default ]
reason: Behavior of max_analyzed_offset query param changed in 8.18.

- do:
search:
rest_total_hits_as_int: true
index: test1
body: {"query" : {"match" : {"field2" : "fox"}}, "highlight" : {"type" : "plain", "fields" : {"field2" : {}}, "max_analyzed_offset": -1}}
- match: {hits.hits.0.highlight.field2.0: "The quick brown <em>fox</em> went to the forest and saw another fox."}

---
"Plain highlighter with max_analyzed_offset < -1 should FAIL":

- requires:
test_runner_features: [capabilities]
capabilities:
- method: GET
path: /_search
capabilities: [ highlight_max_analyzed_offset_default ]
reason: Behavior of max_analyzed_offset query param changed in 8.18.

- do:
catch: bad_request
search:
Expand All @@ -130,4 +188,4 @@ setup:
- match: { status: 400 }
- match: { error.root_cause.0.type: "x_content_parse_exception" }
- match: { error.caused_by.type: "illegal_argument_exception" }
- match: { error.caused_by.reason: "[max_analyzed_offset] must be a positive integer" }
- match: { error.caused_by.reason: "[max_analyzed_offset] must be a positive integer, or -1" }
Original file line number Diff line number Diff line change
Expand Up @@ -2632,6 +2632,41 @@ public void testPostingsHighlighterOrderByScore() throws Exception {
});
}

public void testMaxQueryOffsetDefault() throws Exception {
assertAcked(
prepareCreate("test").setMapping(type1PostingsffsetsMapping())
.setSettings(Settings.builder().put("index.highlight.max_analyzed_offset", "10").build())
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Observe index offset is 10

);
ensureGreen();

prepareIndex("test").setSource(
"field1",
new String[] {
"This sentence contains one match, not that short. This sentence contains zero sentence matches. "
+ "This one contains no matches.",
"This is the second value's first sentence. This one contains no matches. "
+ "This sentence contains three sentence occurrences (sentence).",
"One sentence match here and scored lower since the text is quite long, not that appealing. "
+ "This one contains no matches." }
).get();
refresh();

// Specific for this test: by passing "-1" as "maxAnalyzedOffset", the index highlight setting above will be used.
SearchSourceBuilder source = searchSource().query(termQuery("field1", "sentence"))
.highlighter(highlight().field("field1").order("score").maxAnalyzedOffset(-1));
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Observe query offset is -1


assertResponse(client().search(new SearchRequest("test").source(source)), response -> {
Map<String, HighlightField> highlightFieldMap = response.getHits().getAt(0).getHighlightFields();
assertThat(highlightFieldMap.size(), equalTo(1));
HighlightField field1 = highlightFieldMap.get("field1");
assertThat(field1.fragments().length, equalTo(1));
assertThat(
field1.fragments()[0].string(),
equalTo("This <em>sentence</em> contains one match, not that short. This sentence contains zero sentence matches.")
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Observe only one match

);
});
}

public void testPostingsHighlighterEscapeHtml() throws Exception {
assertAcked(prepareCreate("test").setMapping("title", "type=text," + randomStoreField() + "index_options=offsets"));

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ class CustomFieldHighlighter extends FieldHighlighter {
private final Locale breakIteratorLocale;
private final int noMatchSize;
private String fieldValue;
private final Integer queryMaxAnalyzedOffset;
private final QueryMaxAnalyzedOffset queryMaxAnalyzedOffset;

CustomFieldHighlighter(
String field,
Expand All @@ -47,7 +47,7 @@ class CustomFieldHighlighter extends FieldHighlighter {
PassageFormatter passageFormatter,
Comparator<Passage> passageSortComparator,
int noMatchSize,
Integer queryMaxAnalyzedOffset
QueryMaxAnalyzedOffset queryMaxAnalyzedOffset
) {
super(
field,
Expand Down Expand Up @@ -113,7 +113,7 @@ protected Passage[] getSummaryPassagesNoHighlight(int maxPassages) {
@Override
protected Passage[] highlightOffsetsEnums(OffsetsEnum off) throws IOException {
if (queryMaxAnalyzedOffset != null) {
off = new LimitedOffsetsEnum(off, queryMaxAnalyzedOffset);
off = new LimitedOffsetsEnum(off, queryMaxAnalyzedOffset.getNotNull());
}
return super.highlightOffsetsEnums(off);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ public final class CustomUnifiedHighlighter extends UnifiedHighlighter {
private final int noMatchSize;
private final CustomFieldHighlighter fieldHighlighter;
private final int maxAnalyzedOffset;
private final Integer queryMaxAnalyzedOffset;
private final QueryMaxAnalyzedOffset queryMaxAnalyzedOffset;

/**
* Creates a new instance of {@link CustomUnifiedHighlighter}
Expand Down Expand Up @@ -94,7 +94,7 @@ public CustomUnifiedHighlighter(
int noMatchSize,
int maxPassages,
int maxAnalyzedOffset,
Integer queryMaxAnalyzedOffset,
QueryMaxAnalyzedOffset queryMaxAnalyzedOffset,
boolean requireFieldMatch,
boolean weightMatchesEnabled
) {
Expand Down Expand Up @@ -125,9 +125,9 @@ public Snippet[] highlightField(LeafReader reader, int docId, CheckedSupplier<St
return null;
}
int fieldValueLength = fieldValue.length();
if (((queryMaxAnalyzedOffset == null || queryMaxAnalyzedOffset > maxAnalyzedOffset)
if ((queryMaxAnalyzedOffset == null || queryMaxAnalyzedOffset.getNotNull() > maxAnalyzedOffset)
&& (getOffsetSource(field) == OffsetSource.ANALYSIS)
&& (fieldValueLength > maxAnalyzedOffset))) {
&& (fieldValueLength > maxAnalyzedOffset)) {
throw new IllegalArgumentException(
"The length ["
+ fieldValueLength
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.lucene.search.uhighlight;

public class QueryMaxAnalyzedOffset {
private final int queryMaxAnalyzedOffset;

private QueryMaxAnalyzedOffset(final int queryMaxAnalyzedOffset) {
// If we have a negative value, grab value for the actual maximum from the index.
this.queryMaxAnalyzedOffset = queryMaxAnalyzedOffset;
}

public static QueryMaxAnalyzedOffset create(final Integer queryMaxAnalyzedOffset, final int indexMaxAnalyzedOffset) {
if (queryMaxAnalyzedOffset == null) {
return null;
}
return new QueryMaxAnalyzedOffset(queryMaxAnalyzedOffset < 0 ? indexMaxAnalyzedOffset : queryMaxAnalyzedOffset);
}

public int getNotNull() {
return queryMaxAnalyzedOffset;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ private SearchCapabilities() {}
private static final String OPTIMIZED_SCALAR_QUANTIZATION_BBQ = "optimized_scalar_quantization_bbq";
private static final String KNN_QUANTIZED_VECTOR_RESCORE = "knn_quantized_vector_rescore";

private static final String HIGHLIGHT_MAX_ANALYZED_OFFSET_DEFAULT = "highlight_max_analyzed_offset_default";

public static final Set<String> CAPABILITIES;
static {
HashSet<String> capabilities = new HashSet<>();
Expand All @@ -58,6 +60,7 @@ private SearchCapabilities() {}
if (Build.current().isSnapshot()) {
capabilities.add(KQL_QUERY_SUPPORTED);
}
capabilities.add(HIGHLIGHT_MAX_ANALYZED_OFFSET_DEFAULT);
CAPABILITIES = Set.copyOf(capabilities);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -561,13 +561,12 @@ public Integer phraseLimit() {
}

/**
* Set to a non-negative value which represents the max offset used to analyze
* the field thus avoiding exceptions if the field exceeds this limit.
* "maxAnalyzedOffset" might be non-negative int, null (unknown), or a negative int (defaulting to index analyzed offset).
*/
@SuppressWarnings("unchecked")
public HB maxAnalyzedOffset(Integer maxAnalyzedOffset) {
if (maxAnalyzedOffset != null && maxAnalyzedOffset <= 0) {
throw new IllegalArgumentException("[" + MAX_ANALYZED_OFFSET_FIELD + "] must be a positive integer");
if (maxAnalyzedOffset != null && (maxAnalyzedOffset < -1 || maxAnalyzedOffset == 0)) {
throw new IllegalArgumentException("[" + MAX_ANALYZED_OFFSET_FIELD + "] must be a positive integer, or -1");
}
this.maxAnalyzedOffset = maxAnalyzedOffset;
return (HB) this;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import org.elasticsearch.lucene.search.uhighlight.BoundedBreakIteratorScanner;
import org.elasticsearch.lucene.search.uhighlight.CustomPassageFormatter;
import org.elasticsearch.lucene.search.uhighlight.CustomUnifiedHighlighter;
import org.elasticsearch.lucene.search.uhighlight.QueryMaxAnalyzedOffset;
import org.elasticsearch.lucene.search.uhighlight.Snippet;
import org.elasticsearch.search.fetch.FetchContext;
import org.elasticsearch.search.fetch.FetchSubPhase;
Expand Down Expand Up @@ -121,7 +122,10 @@ CustomUnifiedHighlighter buildHighlighter(FieldHighlightContext fieldContext) {
int maxAnalyzedOffset = indexSettings.getHighlightMaxAnalyzedOffset();
boolean weightMatchesEnabled = indexSettings.isWeightMatchesEnabled();
int numberOfFragments = fieldContext.field.fieldOptions().numberOfFragments();
Integer queryMaxAnalyzedOffset = fieldContext.field.fieldOptions().maxAnalyzedOffset();
QueryMaxAnalyzedOffset queryMaxAnalyzedOffset = QueryMaxAnalyzedOffset.create(
fieldContext.field.fieldOptions().maxAnalyzedOffset(),
maxAnalyzedOffset
);
Analyzer analyzer = wrapAnalyzer(
fieldContext.context.getSearchExecutionContext().getIndexAnalyzer(f -> Lucene.KEYWORD_ANALYZER),
queryMaxAnalyzedOffset
Expand Down Expand Up @@ -171,7 +175,7 @@ CustomUnifiedHighlighter buildHighlighter(FieldHighlightContext fieldContext) {
fieldContext.field.fieldOptions().noMatchSize(),
highlighterNumberOfFragments,
maxAnalyzedOffset,
fieldContext.field.fieldOptions().maxAnalyzedOffset(),
queryMaxAnalyzedOffset,
fieldContext.field.fieldOptions().requireFieldMatch(),
weightMatchesEnabled
);
Expand All @@ -186,9 +190,9 @@ protected PassageFormatter getPassageFormatter(SearchHighlightContext.Field fiel
);
}

protected Analyzer wrapAnalyzer(Analyzer analyzer, Integer maxAnalyzedOffset) {
protected Analyzer wrapAnalyzer(Analyzer analyzer, QueryMaxAnalyzedOffset maxAnalyzedOffset) {
if (maxAnalyzedOffset != null) {
analyzer = new LimitTokenOffsetAnalyzer(analyzer, maxAnalyzedOffset);
analyzer = new LimitTokenOffsetAnalyzer(analyzer, maxAnalyzedOffset.getNotNull());
}
return analyzer;
}
Expand Down
Loading
Loading