diff --git a/docs/changelog/131525.yaml b/docs/changelog/131525.yaml new file mode 100644 index 0000000000000..233c4ff643643 --- /dev/null +++ b/docs/changelog/131525.yaml @@ -0,0 +1,6 @@ +pr: 131525 +summary: Fix semantic highlighting bug on flat quantized fields +area: Highlighting +type: bug +issues: + - 131443 diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java index fe31ae71ba8c1..00f40e903d1ff 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java @@ -42,6 +42,7 @@ public class InferenceFeatures implements FeatureSpecification { ); private static final NodeFeature SEMANTIC_TEXT_MATCH_ALL_HIGHLIGHTER = new NodeFeature("semantic_text.match_all_highlighter"); private static final NodeFeature COHERE_V2_API = new NodeFeature("inference.cohere.v2"); + public static final NodeFeature SEMANTIC_TEXT_HIGHLIGHTING_FLAT = new NodeFeature("semantic_text.highlighter.flat_index_options"); @Override public Set getTestFeatures() { @@ -72,7 +73,8 @@ public Set getTestFeatures() { SEMANTIC_TEXT_INDEX_OPTIONS, COHERE_V2_API, SEMANTIC_TEXT_INDEX_OPTIONS_WITH_DEFAULTS, - SEMANTIC_QUERY_REWRITE_INTERCEPTORS_PROPAGATE_BOOST_AND_QUERY_NAME_FIX + SEMANTIC_QUERY_REWRITE_INTERCEPTORS_PROPAGATE_BOOST_AND_QUERY_NAME_FIX, + SEMANTIC_TEXT_HIGHLIGHTING_FLAT ); } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/highlight/SemanticTextHighlighter.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/highlight/SemanticTextHighlighter.java index 92333a10c4d08..8e55cc9c222b5 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/highlight/SemanticTextHighlighter.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/highlight/SemanticTextHighlighter.java @@ -32,6 +32,7 @@ import org.elasticsearch.search.fetch.subphase.highlight.HighlightField; import org.elasticsearch.search.fetch.subphase.highlight.HighlightUtils; import org.elasticsearch.search.fetch.subphase.highlight.Highlighter; +import org.elasticsearch.search.vectors.DenseVectorQuery; import org.elasticsearch.search.vectors.SparseVectorQueryWrapper; import org.elasticsearch.search.vectors.VectorData; import org.elasticsearch.xcontent.Text; @@ -273,6 +274,8 @@ public void visitLeaf(Query query) { queries.add(fieldType.createExactKnnQuery(VectorData.fromBytes(knnQuery.getTargetCopy()), null)); } else if (query instanceof MatchAllDocsQuery) { queries.add(new MatchAllDocsQuery()); + } else if (query instanceof DenseVectorQuery.Floats floatsQuery) { + queries.add(fieldType.createExactKnnQuery(VectorData.fromFloats(floatsQuery.getQuery()), null)); } } }); diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter.yml index 021dfe320d78e..60dea800ca624 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter.yml @@ -35,6 +35,23 @@ setup: } } + - do: + inference.put: + task_type: text_embedding + inference_id: dense-inference-id-compatible-with-bbq + body: > + { + "service": "text_embedding_test_service", + "service_settings": { + "model": "my_model", + "dimensions": 64, + "similarity": "cosine", + "api_key": "abc64" + }, + "task_settings": { + } + } + - do: indices.create: index: test-sparse-index @@ -70,7 +87,7 @@ setup: id: doc_1 body: title: "Elasticsearch" - body: ["ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!"] + body: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ] refresh: true - do: @@ -89,14 +106,14 @@ setup: index: test-dense-index body: query: - match_all: {} + match_all: { } highlight: fields: - another_body: {} + another_body: { } - - match: { hits.total.value: 1 } - - match: { hits.hits.0._id: "doc_1" } - - not_exists: hits.hits.0.highlight.another_body + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } + - not_exists: hits.hits.0.highlight.another_body --- "Highlighting using a sparse embedding model": @@ -114,10 +131,10 @@ setup: type: "semantic" number_of_fragments: 1 - - match: { hits.total.value: 1 } - - match: { hits.hits.0._id: "doc_1" } + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } - length: { hits.hits.0.highlight.body: 1 } - - match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } + - match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } - do: search: @@ -133,11 +150,11 @@ setup: type: "semantic" number_of_fragments: 2 - - match: { hits.total.value: 1 } - - match: { hits.hits.0._id: "doc_1" } + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } - length: { hits.hits.0.highlight.body: 2 } - - match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } - - match: { hits.hits.0.highlight.body.1: "You Know, for Search!" } + - match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } + - match: { hits.hits.0.highlight.body.1: "You Know, for Search!" } - do: search: @@ -154,10 +171,10 @@ setup: order: "score" number_of_fragments: 1 - - match: { hits.total.value: 1 } - - match: { hits.hits.0._id: "doc_1" } + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } - length: { hits.hits.0.highlight.body: 1 } - - match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } + - match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } - do: search: @@ -196,10 +213,10 @@ setup: type: "semantic" number_of_fragments: 1 - - match: { hits.total.value: 1 } - - match: { hits.hits.0._id: "doc_1" } + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } - length: { hits.hits.0.highlight.body: 1 } - - match: { hits.hits.0.highlight.body.0: "You Know, for Search!" } + - match: { hits.hits.0.highlight.body.0: "You Know, for Search!" } - do: search: @@ -215,11 +232,11 @@ setup: type: "semantic" number_of_fragments: 2 - - match: { hits.total.value: 1 } - - match: { hits.hits.0._id: "doc_1" } + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } - length: { hits.hits.0.highlight.body: 2 } - - match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } - - match: { hits.hits.0.highlight.body.1: "You Know, for Search!" } + - match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } + - match: { hits.hits.0.highlight.body.1: "You Know, for Search!" } - do: search: @@ -236,10 +253,10 @@ setup: order: "score" number_of_fragments: 1 - - match: { hits.total.value: 1 } - - match: { hits.hits.0._id: "doc_1" } + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } - length: { hits.hits.0.highlight.body: 1 } - - match: { hits.hits.0.highlight.body.0: "You Know, for Search!" } + - match: { hits.hits.0.highlight.body.0: "You Know, for Search!" } - do: search: @@ -256,17 +273,17 @@ setup: order: "score" number_of_fragments: 2 - - match: { hits.total.value: 1 } - - match: { hits.hits.0._id: "doc_1" } + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } - length: { hits.hits.0.highlight.body: 2 } - - match: { hits.hits.0.highlight.body.0: "You Know, for Search!" } - - match: { hits.hits.0.highlight.body.1: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } + - match: { hits.hits.0.highlight.body.0: "You Know, for Search!" } + - match: { hits.hits.0.highlight.body.1: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } --- "Default highlighter for fields": - requires: - cluster_features: "semantic_text.highlighter.default" - reason: semantic text field defaults to the semantic highlighter + cluster_features: "semantic_text.highlighter.default" + reason: semantic text field defaults to the semantic highlighter - do: search: @@ -281,11 +298,11 @@ setup: order: "score" number_of_fragments: 2 - - match: { hits.total.value: 1 } - - match: { hits.hits.0._id: "doc_1" } + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } - length: { hits.hits.0.highlight.body: 2 } - - match: { hits.hits.0.highlight.body.0: "You Know, for Search!" } - - match: { hits.hits.0.highlight.body.1: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } + - match: { hits.hits.0.highlight.body.0: "You Know, for Search!" } + - match: { hits.hits.0.highlight.body.1: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } --- "semantic highlighter ignores non-inference fields": @@ -306,8 +323,8 @@ setup: type: semantic number_of_fragments: 2 - - match: { hits.total.value: 1 } - - match: { hits.hits.0._id: "doc_1" } + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } - not_exists: hits.hits.0.highlight.title --- @@ -333,7 +350,7 @@ setup: index: test-multi-chunk-index id: doc_1 body: - semantic_text_field: ["some test data", " ", "now with chunks"] + semantic_text_field: [ "some test data", " ", "now with chunks" ] refresh: true - do: @@ -367,25 +384,25 @@ setup: index: test-sparse-index body: query: - match_all: {} + match_all: { } highlight: fields: body: type: "semantic" number_of_fragments: 2 - - match: { hits.total.value: 1 } - - match: { hits.hits.0._id: "doc_1" } + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } - length: { hits.hits.0.highlight.body: 2 } - - match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } - - match: { hits.hits.0.highlight.body.1: "You Know, for Search!" } + - match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } + - match: { hits.hits.0.highlight.body.1: "You Know, for Search!" } - do: search: index: test-dense-index body: query: - match_all: {} + match_all: { } highlight: fields: body: @@ -432,18 +449,18 @@ setup: index: test-index-sparse body: query: - match_all: {} + match_all: { } highlight: fields: semantic_text_field: type: "semantic" number_of_fragments: 2 - - match: { hits.total.value: 1 } - - match: { hits.hits.0._id: "doc_1" } + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } - length: { hits.hits.0.highlight.semantic_text_field: 2 } - - match: { hits.hits.0.highlight.semantic_text_field.0: "some test data" } - - match: { hits.hits.0.highlight.semantic_text_field.1: "now with chunks" } + - match: { hits.hits.0.highlight.semantic_text_field.0: "some test data" } + - match: { hits.hits.0.highlight.semantic_text_field.1: "now with chunks" } - do: indices.create: @@ -473,7 +490,7 @@ setup: index: test-index-dense body: query: - match_all: {} + match_all: { } highlight: fields: semantic_text_field: @@ -485,3 +502,172 @@ setup: - length: { hits.hits.0.highlight.semantic_text_field: 2 } - match: { hits.hits.0.highlight.semantic_text_field.0: "some test data" } - match: { hits.hits.0.highlight.semantic_text_field.1: "now with chunks" } + +--- +"Highlighting with flat quantization index options": + - requires: + cluster_features: "semantic_text.highlighter.flat_index_options" + reason: semantic highlighter fix for flat index options + + - do: + indices.create: + index: test-dense-index-flat + body: + settings: + index.mapping.semantic_text.use_legacy_format: false + mappings: + properties: + flat_field: + type: semantic_text + inference_id: dense-inference-id + index_options: + dense_vector: + type: flat + int4_flat_field: + type: semantic_text + inference_id: dense-inference-id + index_options: + dense_vector: + type: int4_flat + int8_flat_field: + type: semantic_text + inference_id: dense-inference-id + index_options: + dense_vector: + type: int8_flat + bbq_flat_field: + type: semantic_text + inference_id: dense-inference-id-compatible-with-bbq + index_options: + dense_vector: + type: bbq_flat + + + - do: + index: + index: test-dense-index-flat + id: doc_1 + body: + flat_field: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ] + int4_flat_field: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ] + int8_flat_field: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ] + bbq_flat_field: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ] + refresh: true + + - do: + search: + index: test-dense-index-flat + body: + query: + match_all: { } + highlight: + fields: + flat_field: + type: "semantic" + number_of_fragments: 1 + int4_flat_field: + type: "semantic" + number_of_fragments: 1 + int8_flat_field: + type: "semantic" + number_of_fragments: 1 + bbq_flat_field: + type: "semantic" + number_of_fragments: 1 + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } + - length: { hits.hits.0.highlight: 4 } + - length: { hits.hits.0.highlight.flat_field: 1 } + - match: { hits.hits.0.highlight.flat_field.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } + - length: { hits.hits.0.highlight.int4_flat_field: 1 } + - match: { hits.hits.0.highlight.int4_flat_field.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } + - length: { hits.hits.0.highlight.int8_flat_field: 1 } + - match: { hits.hits.0.highlight.int8_flat_field.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } + - length: { hits.hits.0.highlight.bbq_flat_field: 1 } + - match: { hits.hits.0.highlight.bbq_flat_field.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } + +--- +"Highlighting with HNSW quantization index options": + - requires: + cluster_features: "semantic_text.highlighter.flat_index_options" + reason: semantic highlighter fix for flat index options + + - do: + indices.create: + index: test-dense-index-hnsw + body: + settings: + index.mapping.semantic_text.use_legacy_format: false + mappings: + properties: + hnsw_field: + type: semantic_text + inference_id: dense-inference-id + index_options: + dense_vector: + type: hnsw + int4_hnsw_field: + type: semantic_text + inference_id: dense-inference-id + index_options: + dense_vector: + type: int4_hnsw + int8_hnsw_field: + type: semantic_text + inference_id: dense-inference-id + index_options: + dense_vector: + type: int8_hnsw + bbq_hnsw_field: + type: semantic_text + inference_id: dense-inference-id-compatible-with-bbq + index_options: + dense_vector: + type: bbq_hnsw + + + - do: + index: + index: test-dense-index-hnsw + id: doc_1 + body: + hnsw_field: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ] + int4_hnsw_field: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ] + int8_hnsw_field: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ] + bbq_hnsw_field: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ] + refresh: true + + - do: + search: + index: test-dense-index-hnsw + body: + query: + match_all: { } + highlight: + fields: + hnsw_field: + type: "semantic" + number_of_fragments: 1 + int4_hnsw_field: + type: "semantic" + number_of_fragments: 1 + int8_hnsw_field: + type: "semantic" + number_of_fragments: 1 + bbq_hnsw_field: + type: "semantic" + number_of_fragments: 1 + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } + - length: { hits.hits.0.highlight: 4 } + - length: { hits.hits.0.highlight.hnsw_field: 1 } + - match: { hits.hits.0.highlight.hnsw_field.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } + - length: { hits.hits.0.highlight.int4_hnsw_field: 1 } + - match: { hits.hits.0.highlight.int4_hnsw_field.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } + - length: { hits.hits.0.highlight.int8_hnsw_field: 1 } + - match: { hits.hits.0.highlight.int8_hnsw_field.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } + - length: { hits.hits.0.highlight.bbq_hnsw_field: 1 } + - match: { hits.hits.0.highlight.bbq_hnsw_field.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } + diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter_bwc.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter_bwc.yml index 1e874d60a016c..4675977842973 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter_bwc.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter_bwc.yml @@ -35,6 +35,23 @@ setup: } } + - do: + inference.put: + task_type: text_embedding + inference_id: dense-inference-id-compatible-with-bbq + body: > + { + "service": "text_embedding_test_service", + "service_settings": { + "model": "my_model", + "dimensions": 64, + "similarity": "cosine", + "api_key": "abc64" + }, + "task_settings": { + } + } + - do: indices.create: index: test-sparse-index @@ -65,12 +82,12 @@ setup: --- "Highlighting empty field": - do: - index: - index: test-dense-index - id: doc_1 - body: - body: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ] - refresh: true + index: + index: test-dense-index + id: doc_1 + body: + body: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ] + refresh: true - match: { result: created } @@ -79,14 +96,14 @@ setup: index: test-dense-index body: query: - match_all: {} + match_all: { } highlight: fields: - another_body: {} + another_body: { } - - match: { hits.total.value: 1 } - - match: { hits.hits.0._id: "doc_1" } - - not_exists: hits.hits.0.highlight.another_body + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } + - not_exists: hits.hits.0.highlight.another_body --- "Highlighting using a sparse embedding model": @@ -95,7 +112,7 @@ setup: index: test-sparse-index id: doc_1 body: - body: ["ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!"] + body: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ] refresh: true - match: { result: created } @@ -114,10 +131,10 @@ setup: type: "semantic" number_of_fragments: 1 - - match: { hits.total.value: 1 } - - match: { hits.hits.0._id: "doc_1" } + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } - length: { hits.hits.0.highlight.body: 1 } - - match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } + - match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } - do: search: @@ -133,11 +150,11 @@ setup: type: "semantic" number_of_fragments: 2 - - match: { hits.total.value: 1 } - - match: { hits.hits.0._id: "doc_1" } + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } - length: { hits.hits.0.highlight.body: 2 } - - match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } - - match: { hits.hits.0.highlight.body.1: "You Know, for Search!" } + - match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } + - match: { hits.hits.0.highlight.body.1: "You Know, for Search!" } - do: search: @@ -154,10 +171,10 @@ setup: order: "score" number_of_fragments: 1 - - match: { hits.total.value: 1 } - - match: { hits.hits.0._id: "doc_1" } + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } - length: { hits.hits.0.highlight.body: 1 } - - match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } + - match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } - do: search: @@ -187,7 +204,7 @@ setup: index: test-dense-index id: doc_1 body: - body: ["ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!"] + body: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ] refresh: true - match: { result: created } @@ -206,10 +223,10 @@ setup: type: "semantic" number_of_fragments: 1 - - match: { hits.total.value: 1 } - - match: { hits.hits.0._id: "doc_1" } + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } - length: { hits.hits.0.highlight.body: 1 } - - match: { hits.hits.0.highlight.body.0: "You Know, for Search!" } + - match: { hits.hits.0.highlight.body.0: "You Know, for Search!" } - do: search: @@ -225,11 +242,11 @@ setup: type: "semantic" number_of_fragments: 2 - - match: { hits.total.value: 1 } - - match: { hits.hits.0._id: "doc_1" } + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } - length: { hits.hits.0.highlight.body: 2 } - - match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } - - match: { hits.hits.0.highlight.body.1: "You Know, for Search!" } + - match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } + - match: { hits.hits.0.highlight.body.1: "You Know, for Search!" } - do: search: @@ -246,10 +263,10 @@ setup: order: "score" number_of_fragments: 1 - - match: { hits.total.value: 1 } - - match: { hits.hits.0._id: "doc_1" } + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } - length: { hits.hits.0.highlight.body: 1 } - - match: { hits.hits.0.highlight.body.0: "You Know, for Search!" } + - match: { hits.hits.0.highlight.body.0: "You Know, for Search!" } - do: search: @@ -266,11 +283,11 @@ setup: order: "score" number_of_fragments: 2 - - match: { hits.total.value: 1 } - - match: { hits.hits.0._id: "doc_1" } + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } - length: { hits.hits.0.highlight.body: 2 } - - match: { hits.hits.0.highlight.body.0: "You Know, for Search!" } - - match: { hits.hits.0.highlight.body.1: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } + - match: { hits.hits.0.highlight.body.0: "You Know, for Search!" } + - match: { hits.hits.0.highlight.body.1: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } --- "Highlighting and multi chunks with empty input": @@ -295,7 +312,7 @@ setup: index: test-multi-chunk-index id: doc_1 body: - semantic_text_field: ["some test data", " ", "now with chunks"] + semantic_text_field: [ "some test data", " ", "now with chunks" ] refresh: true - do: @@ -337,18 +354,18 @@ setup: index: test-sparse-index body: query: - match_all: {} + match_all: { } highlight: fields: body: type: "semantic" number_of_fragments: 2 - - match: { hits.total.value: 1 } - - match: { hits.hits.0._id: "doc_1" } + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } - length: { hits.hits.0.highlight.body: 2 } - - match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } - - match: { hits.hits.0.highlight.body.1: "You Know, for Search!" } + - match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } + - match: { hits.hits.0.highlight.body.1: "You Know, for Search!" } - do: index: @@ -363,7 +380,7 @@ setup: index: test-dense-index body: query: - match_all: {} + match_all: { } highlight: fields: body: @@ -410,18 +427,18 @@ setup: index: test-index-sparse body: query: - match_all: {} + match_all: { } highlight: fields: semantic_text_field: type: "semantic" number_of_fragments: 2 - - match: { hits.total.value: 1 } - - match: { hits.hits.0._id: "doc_1" } + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } - length: { hits.hits.0.highlight.semantic_text_field: 2 } - - match: { hits.hits.0.highlight.semantic_text_field.0: "some test data" } - - match: { hits.hits.0.highlight.semantic_text_field.1: "now with chunks" } + - match: { hits.hits.0.highlight.semantic_text_field.0: "some test data" } + - match: { hits.hits.0.highlight.semantic_text_field.1: "now with chunks" } - do: indices.create: @@ -451,7 +468,7 @@ setup: index: test-index-dense body: query: - match_all: {} + match_all: { } highlight: fields: semantic_text_field: @@ -464,3 +481,173 @@ setup: - match: { hits.hits.0.highlight.semantic_text_field.0: "some test data" } - match: { hits.hits.0.highlight.semantic_text_field.1: "now with chunks" } +--- +"Highlighting with flat quantization index options": + - requires: + cluster_features: "semantic_text.highlighter.flat_index_options" + reason: semantic highlighter fix for flat index options + + - do: + indices.create: + index: test-dense-index-flat + body: + settings: + index.mapping.semantic_text.use_legacy_format: true + mappings: + properties: + flat_field: + type: semantic_text + inference_id: dense-inference-id + index_options: + dense_vector: + type: flat + int4_flat_field: + type: semantic_text + inference_id: dense-inference-id + index_options: + dense_vector: + type: int4_flat + int8_flat_field: + type: semantic_text + inference_id: dense-inference-id + index_options: + dense_vector: + type: int8_flat + bbq_flat_field: + type: semantic_text + inference_id: dense-inference-id-compatible-with-bbq + index_options: + dense_vector: + type: bbq_flat + + + - do: + index: + index: test-dense-index-flat + id: doc_1 + body: + flat_field: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ] + int4_flat_field: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ] + int8_flat_field: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ] + bbq_flat_field: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ] + refresh: true + + - do: + search: + index: test-dense-index-flat + body: + query: + match_all: { } + highlight: + fields: + flat_field: + type: "semantic" + number_of_fragments: 1 + int4_flat_field: + type: "semantic" + number_of_fragments: 1 + int8_flat_field: + type: "semantic" + number_of_fragments: 1 + bbq_flat_field: + type: "semantic" + number_of_fragments: 1 + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } + - length: { hits.hits.0.highlight: 4 } + - length: { hits.hits.0.highlight.flat_field: 1 } + - match: { hits.hits.0.highlight.flat_field.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } + - length: { hits.hits.0.highlight.int4_flat_field: 1 } + - match: { hits.hits.0.highlight.int4_flat_field.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } + - length: { hits.hits.0.highlight.int8_flat_field: 1 } + - match: { hits.hits.0.highlight.int8_flat_field.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } + - length: { hits.hits.0.highlight.bbq_flat_field: 1 } + - match: { hits.hits.0.highlight.bbq_flat_field.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } + +--- +"Highlighting with HNSW quantization index options": + - requires: + cluster_features: "semantic_text.highlighter.flat_index_options" + reason: semantic highlighter fix for flat index options + + - do: + indices.create: + index: test-dense-index-hnsw + body: + settings: + index.mapping.semantic_text.use_legacy_format: true + mappings: + properties: + hnsw_field: + type: semantic_text + inference_id: dense-inference-id + index_options: + dense_vector: + type: hnsw + int4_hnsw_field: + type: semantic_text + inference_id: dense-inference-id + index_options: + dense_vector: + type: int4_hnsw + int8_hnsw_field: + type: semantic_text + inference_id: dense-inference-id + index_options: + dense_vector: + type: int8_hnsw + bbq_hnsw_field: + type: semantic_text + inference_id: dense-inference-id-compatible-with-bbq + index_options: + dense_vector: + type: bbq_hnsw + + + - do: + index: + index: test-dense-index-hnsw + id: doc_1 + body: + hnsw_field: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ] + int4_hnsw_field: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ] + int8_hnsw_field: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ] + bbq_hnsw_field: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ] + refresh: true + + - do: + search: + index: test-dense-index-hnsw + body: + query: + match_all: { } + highlight: + fields: + hnsw_field: + type: "semantic" + number_of_fragments: 1 + int4_hnsw_field: + type: "semantic" + number_of_fragments: 1 + int8_hnsw_field: + type: "semantic" + number_of_fragments: 1 + bbq_hnsw_field: + type: "semantic" + number_of_fragments: 1 + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } + - length: { hits.hits.0.highlight: 4 } + - length: { hits.hits.0.highlight.hnsw_field: 1 } + - match: { hits.hits.0.highlight.hnsw_field.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } + - length: { hits.hits.0.highlight.int4_hnsw_field: 1 } + - match: { hits.hits.0.highlight.int4_hnsw_field.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } + - length: { hits.hits.0.highlight.int8_hnsw_field: 1 } + - match: { hits.hits.0.highlight.int8_hnsw_field.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } + - length: { hits.hits.0.highlight.bbq_hnsw_field: 1 } + - match: { hits.hits.0.highlight.bbq_hnsw_field.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." } + + +