From 7d8242001a486ef4881af3ba7b04d1d87f232fa9 Mon Sep 17 00:00:00 2001 From: Samiul Monir <150824886+Samiul-TheSoccerFan@users.noreply.github.com> Date: Wed, 4 Jun 2025 09:33:25 -0400 Subject: [PATCH] Semantic_text match_all with Highlighter (#128702) * initial implementation for match_All * reformat * [CI] Auto commit changes from spotless * Excluding matchAllintercepter * Adding matchAllDocs support for vector fields * [CI] Auto commit changes from spotless * Remove previous implementation * Adding yaml tests for match_all * fixed yaml tests * Update docs/changelog/128702.yaml * Update changelog * changelog - update summary * Fix wrong inference names for the yaml tests --------- Co-authored-by: elasticsearchmachine Co-authored-by: Elastic Machine (cherry picked from commit d1b5532dbfd784048f225e10d1cc1d7c31861880) # Conflicts: # x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java # x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter.yml # x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter_bwc.yml --- docs/changelog/128702.yaml | 5 ++ .../xpack/inference/InferenceFeatures.java | 4 +- .../highlight/SemanticTextHighlighter.java | 10 ++++ .../90_semantic_text_highlighter.yml | 41 ++++++++++++++ .../90_semantic_text_highlighter_bwc.yml | 56 +++++++++++++++++++ 5 files changed, 115 insertions(+), 1 deletion(-) create mode 100644 docs/changelog/128702.yaml diff --git a/docs/changelog/128702.yaml b/docs/changelog/128702.yaml new file mode 100644 index 0000000000000..3f74e56627695 --- /dev/null +++ b/docs/changelog/128702.yaml @@ -0,0 +1,5 @@ +pr: 128702 +summary: Fix missing highlighting in `match_all` queries for `semantic_text` fields +area: Search +type: bug +issues: [] diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java index f840ebd9ed283..8c16cbae82d1f 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java @@ -39,6 +39,7 @@ public Set getFeatures() { private static final NodeFeature SEMANTIC_TEXT_HIGHLIGHTER = new NodeFeature("semantic_text.highlighter"); private static final NodeFeature SEMANTIC_TEXT_HIGHLIGHTER_DEFAULT = new NodeFeature("semantic_text.highlighter.default"); + private static final NodeFeature SEMANTIC_TEXT_MATCH_ALL_HIGHLIGHTER = new NodeFeature("semantic_text.match_all_highlighter"); @Override public Set getTestFeatures() { @@ -57,7 +58,8 @@ public Set getTestFeatures() { TextSimilarityRankRetrieverBuilder.TEXT_SIMILARITY_RERANKER_ALIAS_HANDLING_FIX, SemanticInferenceMetadataFieldsMapper.INFERENCE_METADATA_FIELDS_ENABLED_BY_DEFAULT, SEMANTIC_TEXT_HIGHLIGHTER_DEFAULT, - SEMANTIC_KNN_FILTER_FIX + SEMANTIC_KNN_FILTER_FIX, + SEMANTIC_TEXT_MATCH_ALL_HIGHLIGHTER ); } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/highlight/SemanticTextHighlighter.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/highlight/SemanticTextHighlighter.java index ae8997dd8c1f0..4dff2723115ea 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/highlight/SemanticTextHighlighter.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/highlight/SemanticTextHighlighter.java @@ -15,6 +15,7 @@ import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.KnnByteVectorQuery; import org.apache.lucene.search.KnnFloatVectorQuery; +import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.QueryVisitor; import org.apache.lucene.search.ScoreMode; @@ -267,6 +268,8 @@ public void visitLeaf(Query query) { queries.add(fieldType.createExactKnnQuery(VectorData.fromFloats(knnQuery.getTargetCopy()), null)); } else if (query instanceof KnnByteVectorQuery knnQuery) { queries.add(fieldType.createExactKnnQuery(VectorData.fromBytes(knnQuery.getTargetCopy()), null)); + } else if (query instanceof MatchAllDocsQuery) { + queries.add(new MatchAllDocsQuery()); } } }); @@ -293,6 +296,13 @@ public QueryVisitor getSubVisitor(BooleanClause.Occur occur, Query parent) { } return this; } + + @Override + public void visitLeaf(Query query) { + if (query instanceof MatchAllDocsQuery) { + queries.add(new MatchAllDocsQuery()); + } + } }); return queries; } diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter.yml index 7765795ebfbdc..74729d96b379a 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter.yml @@ -291,3 +291,44 @@ setup: - match: { hits.hits.0._id: "doc_1" } - not_exists: hits.hits.0.highlight.title +--- +"Highlighting with match_all query": + - requires: + cluster_features: "semantic_text.match_all_highlighter" + reason: semantic text field supports match_all query with semantic highlighter. + + - do: + search: + index: test-sparse-index + body: + query: + match_all: {} + highlight: + fields: + body: + type: "semantic" + number_of_fragments: 2 + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } + - length: { hits.hits.0.highlight.body: 2 } + - match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } + - match: { hits.hits.0.highlight.body.1: "You Know, for Search!" } + + - do: + search: + index: test-dense-index + body: + query: + match_all: {} + highlight: + fields: + body: + type: "semantic" + number_of_fragments: 2 + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } + - length: { hits.hits.0.highlight.body: 2 } + - match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } + - match: { hits.hits.0.highlight.body.1: "You Know, for Search!" } diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter_bwc.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter_bwc.yml index 3b2c8ba4f8082..3c339fafcecb7 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter_bwc.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter_bwc.yml @@ -243,4 +243,60 @@ setup: - match: { hits.hits.0.highlight.body.0: "You Know, for Search!" } - match: { hits.hits.0.highlight.body.1: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } +--- +"Highlighting with match_all query": + - requires: + cluster_features: "semantic_text.match_all_highlighter" + reason: semantic text field supports match_all query with semantic highlighter. + - do: + index: + index: test-sparse-index + id: doc_1 + body: + body: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ] + refresh: true + + - do: + search: + index: test-sparse-index + body: + query: + match_all: {} + highlight: + fields: + body: + type: "semantic" + number_of_fragments: 2 + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } + - length: { hits.hits.0.highlight.body: 2 } + - match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } + - match: { hits.hits.0.highlight.body.1: "You Know, for Search!" } + + - do: + index: + index: test-dense-index + id: doc_1 + body: + body: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ] + refresh: true + + - do: + search: + index: test-dense-index + body: + query: + match_all: {} + highlight: + fields: + body: + type: "semantic" + number_of_fragments: 2 + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } + - length: { hits.hits.0.highlight.body: 2 } + - match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } + - match: { hits.hits.0.highlight.body.1: "You Know, for Search!" }