diff --git a/docs/reference/mapping/types/semantic-text.asciidoc b/docs/reference/mapping/types/semantic-text.asciidoc index 62e5075b9287d..c5cc24f957a44 100644 --- a/docs/reference/mapping/types/semantic-text.asciidoc +++ b/docs/reference/mapping/types/semantic-text.asciidoc @@ -133,14 +133,13 @@ You can extract the most relevant fragments from a semantic text field by using POST test-index/_search { "query": { - "semantic": { - "field": "my_semantic_field" + "match": { + "my_semantic_field": "Which country is Paris in?" } }, "highlight": { "fields": { "my_semantic_field": { - "type": "semantic", "number_of_fragments": 2, <1> "order": "score" <2> } @@ -152,6 +151,33 @@ POST test-index/_search <1> Specifies the maximum number of fragments to return. <2> Sorts highlighted fragments by score when set to `score`. By default, fragments will be output in the order they appear in the field (order: none). +Highlighting is supported on fields other than semantic_text. +However, if you want to restrict highlighting to the semantic highlighter and return no fragments when the field is not of type semantic_text, +you can explicitly enforce the `semantic` highlighter in the query: + +[source,console] +------------------------------------------------------------ +PUT test-index +{ + "query": { + "match": { + "my_field": "Which country is Paris in?" + } + }, + "highlight": { + "fields": { + "my_field": { + "type": "semantic", <1> + "number_of_fragments": 2, + "order": "score" + } + } + } +} +------------------------------------------------------------ +// TEST[skip:Requires inference endpoint] +<1> Ensures that highlighting is applied exclusively to semantic_text fields. + [discrete] [[custom-indexing]] ==== Customizing `semantic_text` indexing diff --git a/docs/reference/search/search-your-data/highlighting.asciidoc b/docs/reference/search/search-your-data/highlighting.asciidoc index 63d9c632bffcf..bc81be389cf9c 100644 --- a/docs/reference/search/search-your-data/highlighting.asciidoc +++ b/docs/reference/search/search-your-data/highlighting.asciidoc @@ -37,8 +37,8 @@ GET /_search // TEST[setup:my_index] {es} supports three highlighters: `unified`, `plain`, and `fvh` (fast vector -highlighter). You can specify the highlighter `type` you want to use -for each field. +highlighter) for `text` and `keyword` fields and the `semantic` highlighter for `semantic_text` fields. +You can specify the highlighter `type` you want to use for each field or rely on the field type's default highlighter. [discrete] [[unified-highlighter]] @@ -48,7 +48,19 @@ highlighter breaks the text into sentences and uses the BM25 algorithm to score individual sentences as if they were documents in the corpus. It also supports accurate phrase and multi-term (fuzzy, prefix, regex) highlighting. The `unified` highlighter can combine matches from multiple fields into one result (see -`matched_fields`). This is the default highlighter. +`matched_fields`). + +This is the default highlighter for all `text` and `keyword` fields. + +[discrete] +[[semantic-highlighter]] +==== Semantic Highlighter + +The `semantic` highlighter is specifically designed for use with the <> field. +It identifies and extracts the most relevant fragments from the field based on semantic +similarity between the query and each fragment. + +By default, <> fields use the semantic highlighter. [discrete] [[plain-highlighter]] diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java index 4b68e20673572..d65a8cbd8411b 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java @@ -41,6 +41,7 @@ import org.elasticsearch.index.query.SearchExecutionContext; import org.elasticsearch.search.DocValueFormat; import org.elasticsearch.search.fetch.subphase.FetchFieldsPhase; +import org.elasticsearch.search.fetch.subphase.highlight.DefaultHighlighter; import org.elasticsearch.search.lookup.SearchLookup; import java.io.IOException; @@ -217,6 +218,13 @@ public TimeSeriesParams.MetricType getMetricType() { return null; } + /** + * Returns the default highlighter type to use when highlighting the field. + */ + public String getDefaultHighlighter() { + return DefaultHighlighter.NAME; + } + /** Generates a query that will only match documents that contain the given value. * The default implementation returns a {@link TermQuery} over the value bytes * @throws IllegalArgumentException if {@code value} cannot be converted to the expected data type or if the field is not searchable diff --git a/server/src/main/java/org/elasticsearch/search/SearchModule.java b/server/src/main/java/org/elasticsearch/search/SearchModule.java index 6716c03a3a935..2183ce5646293 100644 --- a/server/src/main/java/org/elasticsearch/search/SearchModule.java +++ b/server/src/main/java/org/elasticsearch/search/SearchModule.java @@ -913,7 +913,7 @@ private static Map setupHighlighters(Settings settings, Lis NamedRegistry highlighters = new NamedRegistry<>("highlighter"); highlighters.register("fvh", new FastVectorHighlighter(settings)); highlighters.register("plain", new PlainHighlighter()); - highlighters.register("unified", new DefaultHighlighter()); + highlighters.register(DefaultHighlighter.NAME, new DefaultHighlighter()); highlighters.extractAndRegister(plugins, SearchPlugin::getHighlighters); return unmodifiableMap(highlighters.getRegistry()); diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java index c47f815c18639..9f888c1f08baa 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java @@ -50,6 +50,8 @@ public class DefaultHighlighter implements Highlighter { + public static final String NAME = "unified"; + @Override public boolean canHighlight(MappedFieldType fieldType) { return true; diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightPhase.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightPhase.java index c356c383d103a..54c265deb948d 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightPhase.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightPhase.java @@ -66,7 +66,7 @@ public void process(HitContext hitContext) throws IOException { Map> contextBuilders = fieldContext.builders; for (String field : contextBuilders.keySet()) { FieldHighlightContext fieldContext = contextBuilders.get(field).apply(hitContext); - Highlighter highlighter = getHighlighter(fieldContext.field); + Highlighter highlighter = getHighlighter(fieldContext.field, fieldContext.fieldType); HighlightField highlightField = highlighter.highlight(fieldContext); if (highlightField != null) { // Note that we make sure to use the original field name in the response. This is because the @@ -80,10 +80,10 @@ public void process(HitContext hitContext) throws IOException { }; } - private Highlighter getHighlighter(SearchHighlightContext.Field field) { + private Highlighter getHighlighter(SearchHighlightContext.Field field, MappedFieldType fieldType) { String highlighterType = field.fieldOptions().highlighterType(); if (highlighterType == null) { - highlighterType = "unified"; + highlighterType = fieldType.getDefaultHighlighter(); } Highlighter highlighter = highlighters.get(highlighterType); if (highlighter == null) { @@ -103,8 +103,6 @@ private FieldContext contextBuilders( Map> builders = new LinkedHashMap<>(); StoredFieldsSpec storedFieldsSpec = StoredFieldsSpec.NO_REQUIREMENTS; for (SearchHighlightContext.Field field : highlightContext.fields()) { - Highlighter highlighter = getHighlighter(field); - Collection fieldNamesToHighlight = context.getSearchExecutionContext().getMatchingFieldNames(field.field()); boolean fieldNameContainsWildcards = field.field().contains("*"); @@ -112,6 +110,7 @@ private FieldContext contextBuilders( boolean sourceRequired = false; for (String fieldName : fieldNamesToHighlight) { MappedFieldType fieldType = context.getSearchExecutionContext().getFieldType(fieldName); + Highlighter highlighter = getHighlighter(field, fieldType); // We should prevent highlighting if a field is anything but a text, match_only_text, // or keyword field. diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java index 4707a7824fcd1..8c2be17777cca 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java @@ -25,6 +25,7 @@ public class InferenceFeatures implements FeatureSpecification { private static final NodeFeature SEMANTIC_TEXT_HIGHLIGHTER = new NodeFeature("semantic_text.highlighter"); + private static final NodeFeature SEMANTIC_TEXT_HIGHLIGHTER_DEFAULT = new NodeFeature("semantic_text.highlighter.default"); @Override public Set getTestFeatures() { @@ -40,7 +41,8 @@ public Set getTestFeatures() { SemanticInferenceMetadataFieldsMapper.EXPLICIT_NULL_FIXES, SEMANTIC_KNN_VECTOR_QUERY_REWRITE_INTERCEPTION_SUPPORTED, TextSimilarityRankRetrieverBuilder.TEXT_SIMILARITY_RERANKER_ALIAS_HANDLING_FIX, - SemanticInferenceMetadataFieldsMapper.INFERENCE_METADATA_FIELDS_ENABLED_BY_DEFAULT + SemanticInferenceMetadataFieldsMapper.INFERENCE_METADATA_FIELDS_ENABLED_BY_DEFAULT, + SEMANTIC_TEXT_HIGHLIGHTER_DEFAULT ); } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java index 1acdff7a751ae..f24f407fd051d 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java @@ -73,6 +73,7 @@ import org.elasticsearch.xpack.core.ml.inference.results.MlTextEmbeddingResults; import org.elasticsearch.xpack.core.ml.inference.results.TextExpansionResults; import org.elasticsearch.xpack.core.ml.search.SparseVectorQueryBuilder; +import org.elasticsearch.xpack.inference.highlight.SemanticTextHighlighter; import java.io.IOException; import java.io.UncheckedIOException; @@ -580,6 +581,11 @@ public String familyTypeName() { return TextFieldMapper.CONTENT_TYPE; } + @Override + public String getDefaultHighlighter() { + return SemanticTextHighlighter.NAME; + } + public String getInferenceId() { return inferenceId; } diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter.yml index ca87c97fc3acd..7765795ebfbdc 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter.yml @@ -55,22 +55,32 @@ setup: index.mapping.semantic_text.use_legacy_format: false mappings: properties: + title: + type: text body: type: semantic_text inference_id: dense-inference-id ---- -"Highlighting using a sparse embedding model": - do: index: index: test-sparse-index id: doc_1 body: + title: "Elasticsearch" body: ["ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!"] refresh: true - - match: { result: created } + - do: + index: + index: test-dense-index + id: doc_1 + body: + title: "Elasticsearch" + body: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ] + refresh: true +--- +"Highlighting using a sparse embedding model": - do: search: index: test-sparse-index @@ -153,16 +163,6 @@ setup: --- "Highlighting using a dense embedding model": - - do: - index: - index: test-dense-index - id: doc_1 - body: - body: ["ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!"] - refresh: true - - - match: { result: created } - - do: search: index: test-dense-index @@ -243,4 +243,51 @@ setup: - match: { hits.hits.0.highlight.body.0: "You Know, for Search!" } - match: { hits.hits.0.highlight.body.1: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } +--- +"Default highlighter for fields": + - requires: + cluster_features: "semantic_text.highlighter.default" + reason: semantic text field defaults to the semantic highlighter + + - do: + search: + index: test-dense-index + body: + query: + match: + body: "What is Elasticsearch?" + highlight: + fields: + body: + order: "score" + number_of_fragments: 2 + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } + - length: { hits.hits.0.highlight.body: 2 } + - match: { hits.hits.0.highlight.body.0: "You Know, for Search!" } + - match: { hits.hits.0.highlight.body.1: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } + +--- +"semantic highlighter ignores non-inference fields": + - requires: + cluster_features: "semantic_text.highlighter.default" + reason: semantic text field defaults to the semantic highlighter + + - do: + search: + index: test-dense-index + body: + query: + match: + title: "Elasticsearch" + highlight: + fields: + title: + type: semantic + number_of_fragments: 2 + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } + - not_exists: hits.hits.0.highlight.title