From 3ccbe89136b3dba76b2630aaf7b7f442c18d5c1b Mon Sep 17 00:00:00 2001 From: Michail Romaios Date: Wed, 26 Nov 2025 13:13:15 +0100 Subject: [PATCH 1/3] chore(SemanticTextHighlighter): improve bwc tests --- .../90_semantic_text_highlighter.yml | 115 +++++++++++------- .../90_semantic_text_highlighter_bwc.yml | 115 +++++++++++------- 2 files changed, 146 insertions(+), 84 deletions(-) diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter.yml index 4683ac579fba8..22d3a7c3f5782 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter.yml @@ -745,56 +745,33 @@ setup: - match: { hits.hits.0.highlight.body.1: "You Know, for Search!" } --- -"Highlighting with type:bbq_disk index options as well as knn with similarity": +"Highlighting with knn with similarity": - requires: - cluster_features: "semantic_text.highlighter.bbq_and_similarity_support" - reason: semantic highlighter fix for disk_bbq index options and knn with similarity - - - do: - indices.create: - index: test-dense-index-bbq_disk - body: - settings: - index.mapping.semantic_text.use_legacy_format: false - mappings: - properties: - bbq_disk_body: - type: semantic_text - inference_id: dense-inference-id - index_options: - dense_vector: - type: bbq_disk - body: - type: semantic_text - inference_id: dense-inference-id + cluster_features: "semantic_text.highlighter.vector_similarity_support" + reason: semantic highlighter fix for knn with similarity - do: index: - index: test-dense-index-bbq_disk + index: test-dense-index id: doc_1 body: - bbq_disk_body: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!", "For a moment, nothing happened. Then, after a second or so, nothing continued to happen." ] body: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!", "For a moment, nothing happened. Then, after a second or so, nothing continued to happen." ] - do: index: - index: test-dense-index-bbq_disk + index: test-dense-index id: doc_2 body: - bbq_disk_body: [ "Nothing travels faster than the speed of light with the possible exception of bad news, which obeys its own special laws."] body: [ "Nothing travels faster than the speed of light with the possible exception of bad news, which obeys its own special laws."] refresh: true - do: search: - index: test-dense-index-bbq_disk + index: test-dense-index body: query: match_all: { } highlight: fields: - bbq_disk_body: - type: "semantic" - number_of_fragments: 1 body: type: "semantic" number_of_fragments: 1 @@ -802,24 +779,18 @@ setup: - match: { hits.total.value: 2 } - match: { hits.hits.0._id: "doc_1" } - - length: { hits.hits.0.highlight: 2 } - - length: { hits.hits.0.highlight.bbq_disk_body: 1 } - - match: { hits.hits.0.highlight.bbq_disk_body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } + - length: { hits.hits.0.highlight: 1 } - length: { hits.hits.0.highlight.body: 1 } - match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } - match: { hits.hits.1._id: "doc_2" } - - length: { hits.hits.1.highlight: 2 } - - length: { hits.hits.1.highlight.bbq_disk_body: 1 } - - match: { hits.hits.1.highlight.bbq_disk_body.0: "Nothing travels faster than the speed of light with the possible exception of bad news, which obeys its own special laws." } + - length: { hits.hits.1.highlight: 1 } - length: { hits.hits.1.highlight.body: 1 } - match: { hits.hits.1.highlight.body.0: "Nothing travels faster than the speed of light with the possible exception of bad news, which obeys its own special laws." } - - - do: search: - index: test-dense-index-bbq_disk + index: test-dense-index body: query: knn: @@ -843,13 +814,72 @@ setup: - match: { hits.hits.0.highlight.body.1: "You Know, for Search!" } - match: { hits.hits.0.highlight.body.2: "For a moment, nothing happened. Then, after a second or so, nothing continued to happen."} +--- +"Highlighting with type:bbq_disk index options": + - requires: + cluster_features: "semantic_text.highlighter.bbq_and_similarity_support" + reason: semantic highlighter fix for disk_bbq index options and knn with similarity + + - do: + indices.create: + index: test-dense-index-bbq_disk + body: + settings: + index.mapping.semantic_text.use_legacy_format: false + mappings: + properties: + bbq_disk_body: + type: semantic_text + inference_id: dense-inference-id + index_options: + dense_vector: + type: bbq_disk + + - do: + index: + index: test-dense-index-bbq_disk + id: doc_1 + body: + bbq_disk_body: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!", "For a moment, nothing happened. Then, after a second or so, nothing continued to happen." ] + - do: + index: + index: test-dense-index-bbq_disk + id: doc_2 + body: + bbq_disk_body: [ "Nothing travels faster than the speed of light with the possible exception of bad news, which obeys its own special laws."] + refresh: true + + - do: + search: + index: test-dense-index-bbq_disk + body: + query: + match_all: { } + highlight: + fields: + bbq_disk_body: + type: "semantic" + number_of_fragments: 1 + + - match: { hits.total.value: 2 } + + - match: { hits.hits.0._id: "doc_1" } + - length: { hits.hits.0.highlight: 1 } + - length: { hits.hits.0.highlight.bbq_disk_body: 1 } + - match: { hits.hits.0.highlight.bbq_disk_body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } + + - match: { hits.hits.1._id: "doc_2" } + - length: { hits.hits.1.highlight: 1 } + - length: { hits.hits.1.highlight.bbq_disk_body: 1 } + - match: { hits.hits.1.highlight.bbq_disk_body.0: "Nothing travels faster than the speed of light with the possible exception of bad news, which obeys its own special laws." } + - do: search: index: test-dense-index-bbq_disk body: query: knn: - field: "bbq_disk_body" + field: "body" query_vector_builder: text_embedding: model_text: "What is Elasticsearch?" @@ -860,13 +890,14 @@ setup: fields: body: type: "semantic" - number_of_fragments: 2 + number_of_fragments: 3 - match: { hits.total.value: 1 } - match: { hits.hits.0._id: "doc_1" } - - length: { hits.hits.0.highlight.body: 2 } + - length: { hits.hits.0.highlight.body: 3 } - match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } - - match: { hits.hits.0.highlight.body.1: "For a moment, nothing happened. Then, after a second or so, nothing continued to happen."} + - match: { hits.hits.0.highlight.body.1: "You Know, for Search!" } + - match: { hits.hits.0.highlight.body.2: "For a moment, nothing happened. Then, after a second or so, nothing continued to happen."} - do: search: diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter_bwc.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter_bwc.yml index f084bb52a06ea..6f5052af67c87 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter_bwc.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter_bwc.yml @@ -650,56 +650,33 @@ setup: - match: { hits.hits.0.highlight.bbq_hnsw_field.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } --- -"Highlighting with type:bbq_disk index options as well as knn with similarity": +"Highlighting with knn with similarity": - requires: - cluster_features: "semantic_text.highlighter.bbq_and_similarity_support" - reason: semantic highlighter fix for disk_bbq index options and knn with similarity - - - do: - indices.create: - index: test-dense-index-bbq_disk - body: - settings: - index.mapping.semantic_text.use_legacy_format: true - mappings: - properties: - bbq_disk_body: - type: semantic_text - inference_id: dense-inference-id - index_options: - dense_vector: - type: bbq_disk - body: - type: semantic_text - inference_id: dense-inference-id + cluster_features: "semantic_text.highlighter.vector_similarity_support" + reason: semantic highlighter fix for knn with similarity - do: index: - index: test-dense-index-bbq_disk + index: test-dense-index id: doc_1 body: - bbq_disk_body: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!", "For a moment, nothing happened. Then, after a second or so, nothing continued to happen." ] body: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!", "For a moment, nothing happened. Then, after a second or so, nothing continued to happen." ] - do: index: - index: test-dense-index-bbq_disk + index: test-dense-index id: doc_2 body: - bbq_disk_body: [ "Nothing travels faster than the speed of light with the possible exception of bad news, which obeys its own special laws."] body: [ "Nothing travels faster than the speed of light with the possible exception of bad news, which obeys its own special laws."] refresh: true - do: search: - index: test-dense-index-bbq_disk + index: test-dense-index body: query: match_all: { } highlight: fields: - bbq_disk_body: - type: "semantic" - number_of_fragments: 1 body: type: "semantic" number_of_fragments: 1 @@ -707,24 +684,18 @@ setup: - match: { hits.total.value: 2 } - match: { hits.hits.0._id: "doc_1" } - - length: { hits.hits.0.highlight: 2 } - - length: { hits.hits.0.highlight.bbq_disk_body: 1 } - - match: { hits.hits.0.highlight.bbq_disk_body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } + - length: { hits.hits.0.highlight: 1 } - length: { hits.hits.0.highlight.body: 1 } - match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } - match: { hits.hits.1._id: "doc_2" } - - length: { hits.hits.1.highlight: 2 } - - length: { hits.hits.1.highlight.bbq_disk_body: 1 } - - match: { hits.hits.1.highlight.bbq_disk_body.0: "Nothing travels faster than the speed of light with the possible exception of bad news, which obeys its own special laws." } + - length: { hits.hits.1.highlight: 1 } - length: { hits.hits.1.highlight.body: 1 } - match: { hits.hits.1.highlight.body.0: "Nothing travels faster than the speed of light with the possible exception of bad news, which obeys its own special laws." } - - - do: search: - index: test-dense-index-bbq_disk + index: test-dense-index body: query: knn: @@ -748,13 +719,72 @@ setup: - match: { hits.hits.0.highlight.body.1: "You Know, for Search!" } - match: { hits.hits.0.highlight.body.2: "For a moment, nothing happened. Then, after a second or so, nothing continued to happen."} +--- +"Highlighting with type:bbq_disk index options": + - requires: + cluster_features: "semantic_text.highlighter.bbq_and_similarity_support" + reason: semantic highlighter fix for disk_bbq index options and knn with similarity + + - do: + indices.create: + index: test-dense-index-bbq_disk + body: + settings: + index.mapping.semantic_text.use_legacy_format: true + mappings: + properties: + bbq_disk_body: + type: semantic_text + inference_id: dense-inference-id + index_options: + dense_vector: + type: bbq_disk + + - do: + index: + index: test-dense-index-bbq_disk + id: doc_1 + body: + bbq_disk_body: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!", "For a moment, nothing happened. Then, after a second or so, nothing continued to happen." ] + - do: + index: + index: test-dense-index-bbq_disk + id: doc_2 + body: + bbq_disk_body: [ "Nothing travels faster than the speed of light with the possible exception of bad news, which obeys its own special laws."] + refresh: true + + - do: + search: + index: test-dense-index-bbq_disk + body: + query: + match_all: { } + highlight: + fields: + bbq_disk_body: + type: "semantic" + number_of_fragments: 1 + + - match: { hits.total.value: 2 } + + - match: { hits.hits.0._id: "doc_1" } + - length: { hits.hits.0.highlight: 1 } + - length: { hits.hits.0.highlight.bbq_disk_body: 1 } + - match: { hits.hits.0.highlight.bbq_disk_body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } + + - match: { hits.hits.1._id: "doc_2" } + - length: { hits.hits.1.highlight: 1 } + - length: { hits.hits.1.highlight.bbq_disk_body: 1 } + - match: { hits.hits.1.highlight.bbq_disk_body.0: "Nothing travels faster than the speed of light with the possible exception of bad news, which obeys its own special laws." } + - do: search: index: test-dense-index-bbq_disk body: query: knn: - field: "bbq_disk_body" + field: "body" query_vector_builder: text_embedding: model_text: "What is Elasticsearch?" @@ -765,13 +795,14 @@ setup: fields: body: type: "semantic" - number_of_fragments: 2 + number_of_fragments: 3 - match: { hits.total.value: 1 } - match: { hits.hits.0._id: "doc_1" } - - length: { hits.hits.0.highlight.body: 2 } + - length: { hits.hits.0.highlight.body: 3 } - match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } - - match: { hits.hits.0.highlight.body.1: "For a moment, nothing happened. Then, after a second or so, nothing continued to happen."} + - match: { hits.hits.0.highlight.body.1: "You Know, for Search!" } + - match: { hits.hits.0.highlight.body.2: "For a moment, nothing happened. Then, after a second or so, nothing continued to happen."} - do: search: From 72cea51599202a86cfd253e7a49c4c7a220b0e37 Mon Sep 17 00:00:00 2001 From: Michail Romaios Date: Wed, 26 Nov 2025 16:04:42 +0100 Subject: [PATCH 2/3] fix yaml tests --- .../inference/90_semantic_text_highlighter.yml | 6 +++--- .../90_semantic_text_highlighter_bwc.yml | 16 ++++++++-------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter.yml index 22d3a7c3f5782..90b4243443781 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter.yml @@ -830,7 +830,7 @@ setup: properties: bbq_disk_body: type: semantic_text - inference_id: dense-inference-id + inference_id: dense-inference-id-compatible-with-bbq index_options: dense_vector: type: bbq_disk @@ -879,7 +879,7 @@ setup: body: query: knn: - field: "body" + field: "bbq_disk_body" query_vector_builder: text_embedding: model_text: "What is Elasticsearch?" @@ -888,7 +888,7 @@ setup: similarity: 0.9977 highlight: fields: - body: + bbq_disk_body: type: "semantic" number_of_fragments: 3 diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter_bwc.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter_bwc.yml index 6f5052af67c87..65fdc011b16d0 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter_bwc.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter_bwc.yml @@ -735,7 +735,7 @@ setup: properties: bbq_disk_body: type: semantic_text - inference_id: dense-inference-id + inference_id: dense-inference-id-compatible-with-bbq index_options: dense_vector: type: bbq_disk @@ -784,25 +784,25 @@ setup: body: query: knn: - field: "body" + field: "bbq_disk_body" query_vector_builder: text_embedding: model_text: "What is Elasticsearch?" k: 10 num_candidates: 10 - similarity: 0.9977 + similarity: 0.9975 highlight: fields: - body: + bbq_disk_body: type: "semantic" number_of_fragments: 3 - match: { hits.total.value: 1 } - match: { hits.hits.0._id: "doc_1" } - - length: { hits.hits.0.highlight.body: 3 } - - match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } - - match: { hits.hits.0.highlight.body.1: "You Know, for Search!" } - - match: { hits.hits.0.highlight.body.2: "For a moment, nothing happened. Then, after a second or so, nothing continued to happen."} + - length: { hits.hits.0.highlight.bbq_disk_body: 3 } + - match: { hits.hits.0.highlight.bbq_disk_body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } + - match: { hits.hits.0.highlight.bbq_disk_body.1: "You Know, for Search!" } + - match: { hits.hits.0.highlight.bbq_disk_body.2: "For a moment, nothing happened. Then, after a second or so, nothing continued to happen."} - do: search: From 75c323b57f36c57a1698a058b613c26c158ee911 Mon Sep 17 00:00:00 2001 From: Michail Romaios Date: Wed, 26 Nov 2025 16:50:23 +0100 Subject: [PATCH 3/3] yaml test fix --- .../test/inference/90_semantic_text_highlighter.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter.yml index 90b4243443781..5cce8979abdbc 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter.yml @@ -885,7 +885,7 @@ setup: model_text: "What is Elasticsearch?" k: 10 num_candidates: 10 - similarity: 0.9977 + similarity: 0.9975 highlight: fields: bbq_disk_body: @@ -894,10 +894,10 @@ setup: - match: { hits.total.value: 1 } - match: { hits.hits.0._id: "doc_1" } - - length: { hits.hits.0.highlight.body: 3 } - - match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } - - match: { hits.hits.0.highlight.body.1: "You Know, for Search!" } - - match: { hits.hits.0.highlight.body.2: "For a moment, nothing happened. Then, after a second or so, nothing continued to happen."} + - length: { hits.hits.0.highlight.bbq_disk_body: 3 } + - match: { hits.hits.0.highlight.bbq_disk_body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } + - match: { hits.hits.0.highlight.bbq_disk_body.1: "You Know, for Search!" } + - match: { hits.hits.0.highlight.bbq_disk_body.2: "For a moment, nothing happened. Then, after a second or so, nothing continued to happen."} - do: search: