Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 29 additions & 3 deletions docs/reference/mapping/types/semantic-text.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -133,14 +133,13 @@ You can extract the most relevant fragments from a semantic text field by using
PUT test-index
{
"query": {
"semantic": {
"field": "my_semantic_field"
"match": {
"my_semantic_field": "Which country is Paris in?"
}
},
"highlight": {
"fields": {
"my_semantic_field": {
"type": "semantic",
"number_of_fragments": 2, <1>
"order": "score" <2>
}
Expand All @@ -152,6 +151,33 @@ PUT test-index
<1> Specifies the maximum number of fragments to return.
<2> Sorts highlighted fragments by score when set to `score`. By default, fragments will be output in the order they appear in the field (order: none).

Highlighting is supported on fields other than semantic_text.
However, if you want to restrict highlighting to the semantic highlighter and return no fragments when the field is not of type semantic_text,
you can explicitly enforce the `semantic` highlighter in the query:

[source,console]
------------------------------------------------------------
PUT test-index
{
"query": {
"match": {
"my_field": "Which country is Paris in?"
}
},
"highlight": {
"fields": {
"my_field": {
"type": "semantic", <1>
"number_of_fragments": 2,
"order": "score"
}
}
}
}
------------------------------------------------------------
// TEST[skip:Requires inference endpoint]
<1> Ensures that highlighting is applied exclusively to semantic_text fields.

[discrete]
[[custom-indexing]]
==== Customizing `semantic_text` indexing
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.fetch.subphase.FetchFieldsPhase;
import org.elasticsearch.search.fetch.subphase.highlight.DefaultHighlighter;
import org.elasticsearch.search.lookup.SearchLookup;

import java.io.IOException;
Expand Down Expand Up @@ -217,6 +218,13 @@ public TimeSeriesParams.MetricType getMetricType() {
return null;
}

/**
* Returns the default highlighter type to use when highlighting the field.
*/
public String getDefaultHighlighter() {
return DefaultHighlighter.NAME;
}

/** Generates a query that will only match documents that contain the given value.
* The default implementation returns a {@link TermQuery} over the value bytes
* @throws IllegalArgumentException if {@code value} cannot be converted to the expected data type or if the field is not searchable
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -913,7 +913,7 @@ private static Map<String, Highlighter> setupHighlighters(Settings settings, Lis
NamedRegistry<Highlighter> highlighters = new NamedRegistry<>("highlighter");
highlighters.register("fvh", new FastVectorHighlighter(settings));
highlighters.register("plain", new PlainHighlighter());
highlighters.register("unified", new DefaultHighlighter());
highlighters.register(DefaultHighlighter.NAME, new DefaultHighlighter());
highlighters.extractAndRegister(plugins, SearchPlugin::getHighlighters);

return unmodifiableMap(highlighters.getRegistry());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@

public class DefaultHighlighter implements Highlighter {

public static final String NAME = "unified";

@Override
public boolean canHighlight(MappedFieldType fieldType) {
return true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ public void process(HitContext hitContext) throws IOException {
Map<String, Function<HitContext, FieldHighlightContext>> contextBuilders = fieldContext.builders;
for (String field : contextBuilders.keySet()) {
FieldHighlightContext fieldContext = contextBuilders.get(field).apply(hitContext);
Highlighter highlighter = getHighlighter(fieldContext.field);
Highlighter highlighter = getHighlighter(fieldContext.field, fieldContext.fieldType);
HighlightField highlightField = highlighter.highlight(fieldContext);
if (highlightField != null) {
// Note that we make sure to use the original field name in the response. This is because the
Expand All @@ -80,10 +80,10 @@ public void process(HitContext hitContext) throws IOException {
};
}

private Highlighter getHighlighter(SearchHighlightContext.Field field) {
private Highlighter getHighlighter(SearchHighlightContext.Field field, MappedFieldType fieldType) {
String highlighterType = field.fieldOptions().highlighterType();
if (highlighterType == null) {
highlighterType = "unified";
highlighterType = fieldType.getDefaultHighlighter();
}
Highlighter highlighter = highlighters.get(highlighterType);
if (highlighter == null) {
Expand All @@ -103,15 +103,14 @@ private FieldContext contextBuilders(
Map<String, Function<HitContext, FieldHighlightContext>> builders = new LinkedHashMap<>();
StoredFieldsSpec storedFieldsSpec = StoredFieldsSpec.NO_REQUIREMENTS;
for (SearchHighlightContext.Field field : highlightContext.fields()) {
Highlighter highlighter = getHighlighter(field);

Collection<String> fieldNamesToHighlight = context.getSearchExecutionContext().getMatchingFieldNames(field.field());

boolean fieldNameContainsWildcards = field.field().contains("*");
Set<String> storedFields = new HashSet<>();
boolean sourceRequired = false;
for (String fieldName : fieldNamesToHighlight) {
MappedFieldType fieldType = context.getSearchExecutionContext().getFieldType(fieldName);
Highlighter highlighter = getHighlighter(field, fieldType);

// We should prevent highlighting if a field is anything but a text, match_only_text,
// or keyword field.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@
import org.elasticsearch.xpack.core.ml.inference.results.MlTextEmbeddingResults;
import org.elasticsearch.xpack.core.ml.inference.results.TextExpansionResults;
import org.elasticsearch.xpack.core.ml.search.SparseVectorQueryBuilder;
import org.elasticsearch.xpack.inference.highlight.SemanticTextHighlighter;

import java.io.IOException;
import java.io.UncheckedIOException;
Expand Down Expand Up @@ -580,6 +581,11 @@ public String familyTypeName() {
return TextFieldMapper.CONTENT_TYPE;
}

@Override
public String getDefaultHighlighter() {
return SemanticTextHighlighter.NAME;
}

public String getInferenceId() {
return inferenceId;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,6 @@ setup:
highlight:
fields:
body:
type: "semantic"
number_of_fragments: 1

- match: { hits.total.value: 1 }
Expand All @@ -101,7 +100,6 @@ setup:
highlight:
fields:
body:
type: "semantic"
number_of_fragments: 2

- match: { hits.total.value: 1 }
Expand All @@ -121,7 +119,6 @@ setup:
highlight:
fields:
body:
type: "semantic"
order: "score"
number_of_fragments: 1

Expand All @@ -141,7 +138,6 @@ setup:
highlight:
fields:
body:
type: "semantic"
order: "score"
number_of_fragments: 2

Expand Down Expand Up @@ -174,7 +170,6 @@ setup:
highlight:
fields:
body:
type: "semantic"
number_of_fragments: 1

- match: { hits.total.value: 1 }
Expand All @@ -193,7 +188,6 @@ setup:
highlight:
fields:
body:
type: "semantic"
number_of_fragments: 2

- match: { hits.total.value: 1 }
Expand All @@ -213,7 +207,6 @@ setup:
highlight:
fields:
body:
type: "semantic"
order: "score"
number_of_fragments: 1

Expand All @@ -233,7 +226,6 @@ setup:
highlight:
fields:
body:
type: "semantic"
order: "score"
number_of_fragments: 2

Expand Down