diff --git a/docs/changelog/128702.yaml b/docs/changelog/128702.yaml new file mode 100644 index 0000000000000..3f74e56627695 --- /dev/null +++ b/docs/changelog/128702.yaml @@ -0,0 +1,5 @@ +pr: 128702 +summary: Fix missing highlighting in `match_all` queries for `semantic_text` fields +area: Search +type: bug +issues: [] diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java index ae5fc602babb9..669e29ba7debf 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java @@ -34,6 +34,7 @@ public class InferenceFeatures implements FeatureSpecification { private static final NodeFeature TEST_RULE_RETRIEVER_WITH_INDICES_THAT_DONT_RETURN_RANK_DOCS = new NodeFeature( "test_rule_retriever.with_indices_that_dont_return_rank_docs" ); + private static final NodeFeature SEMANTIC_TEXT_MATCH_ALL_HIGHLIGHTER = new NodeFeature("semantic_text.match_all_highlighter"); @Override public Set getTestFeatures() { @@ -57,7 +58,8 @@ public Set getTestFeatures() { SemanticTextFieldMapper.SEMANTIC_TEXT_BIT_VECTOR_SUPPORT, SemanticTextFieldMapper.SEMANTIC_TEXT_HANDLE_EMPTY_INPUT, TEST_RULE_RETRIEVER_WITH_INDICES_THAT_DONT_RETURN_RANK_DOCS, - SEMANTIC_TEXT_SUPPORT_CHUNKING_CONFIG + SEMANTIC_TEXT_SUPPORT_CHUNKING_CONFIG, + SEMANTIC_TEXT_MATCH_ALL_HIGHLIGHTER ); } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/highlight/SemanticTextHighlighter.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/highlight/SemanticTextHighlighter.java index ae8997dd8c1f0..4dff2723115ea 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/highlight/SemanticTextHighlighter.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/highlight/SemanticTextHighlighter.java @@ -15,6 +15,7 @@ import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.KnnByteVectorQuery; import org.apache.lucene.search.KnnFloatVectorQuery; +import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.QueryVisitor; import org.apache.lucene.search.ScoreMode; @@ -267,6 +268,8 @@ public void visitLeaf(Query query) { queries.add(fieldType.createExactKnnQuery(VectorData.fromFloats(knnQuery.getTargetCopy()), null)); } else if (query instanceof KnnByteVectorQuery knnQuery) { queries.add(fieldType.createExactKnnQuery(VectorData.fromBytes(knnQuery.getTargetCopy()), null)); + } else if (query instanceof MatchAllDocsQuery) { + queries.add(new MatchAllDocsQuery()); } } }); @@ -293,6 +296,13 @@ public QueryVisitor getSubVisitor(BooleanClause.Occur occur, Query parent) { } return this; } + + @Override + public void visitLeaf(Query query) { + if (query instanceof MatchAllDocsQuery) { + queries.add(new MatchAllDocsQuery()); + } + } }); return queries; } diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter.yml index f1a33c262f9d7..a25a1464a7226 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter.yml @@ -336,3 +336,133 @@ setup: - length: { hits.hits.0.highlight.semantic_text_field: 2 } - match: { hits.hits.0.highlight.semantic_text_field.0: "some test data" } - match: { hits.hits.0.highlight.semantic_text_field.1: "now with chunks" } + +--- +"Highlighting with match_all query": + - requires: + cluster_features: "semantic_text.match_all_highlighter" + reason: semantic text field supports match_all query with semantic highlighter, effective from 8.19 and 9.1.0. + + - do: + search: + index: test-sparse-index + body: + query: + match_all: {} + highlight: + fields: + body: + type: "semantic" + number_of_fragments: 2 + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } + - length: { hits.hits.0.highlight.body: 2 } + - match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } + - match: { hits.hits.0.highlight.body.1: "You Know, for Search!" } + + - do: + search: + index: test-dense-index + body: + query: + match_all: {} + highlight: + fields: + body: + type: "semantic" + number_of_fragments: 2 + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } + - length: { hits.hits.0.highlight.body: 2 } + - match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } + - match: { hits.hits.0.highlight.body.1: "You Know, for Search!" } + +--- +"Highlighting with match_all and multi chunks with empty input": + - requires: + cluster_features: "semantic_text.match_all_highlighter" + reason: semantic text field supports match_all query with semantic highlighter, effective from 8.19 and 9.1.0. + + - do: + indices.create: + index: test-index-sparse + body: + settings: + index.mapping.semantic_text.use_legacy_format: false + mappings: + properties: + semantic_text_field: + type: semantic_text + inference_id: sparse-inference-id + text_field: + type: text + + - do: + index: + index: test-index-sparse + id: doc_1 + body: + semantic_text_field: [ "some test data", " ", "now with chunks" ] + text_field: "some test data" + refresh: true + + - do: + search: + index: test-index-sparse + body: + query: + match_all: {} + highlight: + fields: + semantic_text_field: + type: "semantic" + number_of_fragments: 2 + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } + - length: { hits.hits.0.highlight.semantic_text_field: 2 } + - match: { hits.hits.0.highlight.semantic_text_field.0: "some test data" } + - match: { hits.hits.0.highlight.semantic_text_field.1: "now with chunks" } + + - do: + indices.create: + index: test-index-dense + body: + settings: + index.mapping.semantic_text.use_legacy_format: false + mappings: + properties: + semantic_text_field: + type: semantic_text + inference_id: dense-inference-id + text_field: + type: text + + - do: + index: + index: test-index-dense + id: doc_1 + body: + semantic_text_field: [ "some test data", " ", "now with chunks" ] + text_field: "some test data" + refresh: true + + - do: + search: + index: test-index-dense + body: + query: + match_all: {} + highlight: + fields: + semantic_text_field: + type: "semantic" + number_of_fragments: 2 + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } + - length: { hits.hits.0.highlight.semantic_text_field: 2 } + - match: { hits.hits.0.highlight.semantic_text_field.0: "some test data" } + - match: { hits.hits.0.highlight.semantic_text_field.1: "now with chunks" } diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter_bwc.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter_bwc.yml index 52fbf4d2723ac..6fcb1bb5a31b3 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter_bwc.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter_bwc.yml @@ -288,3 +288,150 @@ setup: - length: { hits.hits.0.highlight.semantic_text_field: 2 } - match: { hits.hits.0.highlight.semantic_text_field.0: "some test data" } - match: { hits.hits.0.highlight.semantic_text_field.1: "now with chunks" } + +--- +"Highlighting with match_all query": + - requires: + cluster_features: "semantic_text.match_all_highlighter" + reason: semantic text field supports match_all query with semantic highlighter, effective from 8.19 and 9.1.0. + + - do: + index: + index: test-sparse-index + id: doc_1 + body: + body: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ] + refresh: true + + - do: + search: + index: test-sparse-index + body: + query: + match_all: {} + highlight: + fields: + body: + type: "semantic" + number_of_fragments: 2 + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } + - length: { hits.hits.0.highlight.body: 2 } + - match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } + - match: { hits.hits.0.highlight.body.1: "You Know, for Search!" } + + - do: + index: + index: test-dense-index + id: doc_1 + body: + body: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ] + refresh: true + + - do: + search: + index: test-dense-index + body: + query: + match_all: {} + highlight: + fields: + body: + type: "semantic" + number_of_fragments: 2 + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } + - length: { hits.hits.0.highlight.body: 2 } + - match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } + - match: { hits.hits.0.highlight.body.1: "You Know, for Search!" } + +--- +"Highlighting with match_all and multi chunks with empty input": + - requires: + cluster_features: "semantic_text.match_all_highlighter" + reason: semantic text field supports match_all query with semantic highlighter, effective from 8.19 and 9.1.0. + + - do: + indices.create: + index: test-index-sparse + body: + settings: + index.mapping.semantic_text.use_legacy_format: true + mappings: + properties: + semantic_text_field: + type: semantic_text + inference_id: sparse-inference-id + text_field: + type: text + + - do: + index: + index: test-index-sparse + id: doc_1 + body: + semantic_text_field: [ "some test data", " ", "now with chunks" ] + text_field: "some test data" + refresh: true + + - do: + search: + index: test-index-sparse + body: + query: + match_all: {} + highlight: + fields: + semantic_text_field: + type: "semantic" + number_of_fragments: 2 + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } + - length: { hits.hits.0.highlight.semantic_text_field: 2 } + - match: { hits.hits.0.highlight.semantic_text_field.0: "some test data" } + - match: { hits.hits.0.highlight.semantic_text_field.1: "now with chunks" } + + - do: + indices.create: + index: test-index-dense + body: + settings: + index.mapping.semantic_text.use_legacy_format: true + mappings: + properties: + semantic_text_field: + type: semantic_text + inference_id: dense-inference-id + text_field: + type: text + + - do: + index: + index: test-index-dense + id: doc_1 + body: + semantic_text_field: [ "some test data", " ", "now with chunks" ] + text_field: "some test data" + refresh: true + + - do: + search: + index: test-index-dense + body: + query: + match_all: {} + highlight: + fields: + semantic_text_field: + type: "semantic" + number_of_fragments: 2 + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } + - length: { hits.hits.0.highlight.semantic_text_field: 2 } + - match: { hits.hits.0.highlight.semantic_text_field.0: "some test data" } + - match: { hits.hits.0.highlight.semantic_text_field.1: "now with chunks" } +