Skip to content

Commit 188c92a

Browse files
committed
add yaml test and avoid loading embeddings when not needed
1 parent 096a7de commit 188c92a

File tree

2 files changed

+38
-5
lines changed

2 files changed

+38
-5
lines changed

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1063,7 +1063,9 @@ public void setNextReader(LeafReaderContext context) {
10631063
if (childScorer != null) {
10641064
childScorer.iterator().nextDoc();
10651065
}
1066-
dvLoader = fieldLoader.docValuesLoader(context.reader(), null);
1066+
if (onlyTextChunks == false) {
1067+
dvLoader = fieldLoader.docValuesLoader(context.reader(), null);
1068+
}
10671069
var terms = context.reader().terms(getOffsetsFieldName(name()));
10681070
offsetsLoader = terms != null ? OffsetSourceField.loader(terms) : null;
10691071
} catch (IOException exc) {
@@ -1141,10 +1143,12 @@ private void iterateChildDocs(
11411143
CheckedConsumer<OffsetSourceFieldMapper.OffsetSource, IOException> action
11421144
) throws IOException {
11431145
while (it.docID() < doc) {
1144-
if (dvLoader == null || dvLoader.advanceToDoc(it.docID()) == false) {
1145-
throw new IllegalStateException(
1146-
"Cannot fetch values for field [" + name() + "], missing embeddings for doc [" + doc + "]"
1147-
);
1146+
if (onlyTextChunks == false) {
1147+
if (dvLoader == null || dvLoader.advanceToDoc(it.docID()) == false) {
1148+
throw new IllegalStateException(
1149+
"Cannot fetch values for field [" + name() + "], missing embeddings for doc [" + doc + "]"
1150+
);
1151+
}
11481152
}
11491153

11501154
var offset = offsetsLoader.advanceTo(it.docID());

x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter.yml

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -715,3 +715,32 @@ setup:
715715
- match: { hits.hits.0.fields.semantic_text_field.0: "some test data" }
716716
- match: { hits.hits.0.fields.semantic_text_field.1: "now with chunks" }
717717
- match: { hits.hits.0.fields.semantic_text_field.2: "text field data" }
718+
719+
---
720+
"Highlighting with match_all in a highlight_query":
721+
- requires:
722+
cluster_features: "semantic_text.match_all_highlighter"
723+
reason: semantic text field supports match_all query with semantic highlighter, effective from 8.19 and 9.1.0.
724+
725+
- do:
726+
search:
727+
index: test-sparse-index
728+
body:
729+
query:
730+
ids: {
731+
values: ["doc_1"]
732+
}
733+
highlight:
734+
fields:
735+
body:
736+
type: "semantic"
737+
number_of_fragments: 2
738+
highlight_query: {
739+
match_all: {}
740+
}
741+
742+
- match: { hits.total.value: 1 }
743+
- match: { hits.hits.0._id: "doc_1" }
744+
- length: { hits.hits.0.highlight.body: 2 }
745+
- match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
746+
- match: { hits.hits.0.highlight.body.1: "You Know, for Search!" }

0 commit comments

Comments
 (0)