Skip to content

Commit 51d9aae

Browse files
committed
Add yaml test
1 parent 65acf8f commit 51d9aae

File tree

3 files changed

+146
-1
lines changed

3 files changed

+146
-1
lines changed

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
import java.util.Set;
1717

18+
import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper.SEMANTIC_TEXT_SUPPORT_CHUNKING_CONFIG;
1819
import static org.elasticsearch.xpack.inference.queries.SemanticKnnVectorQueryRewriteInterceptor.SEMANTIC_KNN_FILTER_FIX;
1920
import static org.elasticsearch.xpack.inference.queries.SemanticKnnVectorQueryRewriteInterceptor.SEMANTIC_KNN_VECTOR_QUERY_REWRITE_INTERCEPTION_SUPPORTED;
2021
import static org.elasticsearch.xpack.inference.queries.SemanticMatchQueryRewriteInterceptor.SEMANTIC_MATCH_QUERY_REWRITE_INTERCEPTION_SUPPORTED;
@@ -49,7 +50,8 @@ public Set<NodeFeature> getTestFeatures() {
4950
SemanticInferenceMetadataFieldsMapper.INFERENCE_METADATA_FIELDS_ENABLED_BY_DEFAULT,
5051
SEMANTIC_TEXT_HIGHLIGHTER_DEFAULT,
5152
SEMANTIC_KNN_FILTER_FIX,
52-
TEST_RERANKING_SERVICE_PARSE_TEXT_AS_SCORE
53+
TEST_RERANKING_SERVICE_PARSE_TEXT_AS_SCORE,
54+
SEMANTIC_TEXT_SUPPORT_CHUNKING_CONFIG
5355
);
5456
}
5557
}

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ public class SemanticTextFieldMapper extends FieldMapper implements InferenceFie
121121
"semantic_text.always_emit_inference_id_fix"
122122
);
123123
public static final NodeFeature SEMANTIC_TEXT_SKIP_INFERENCE_FIELDS = new NodeFeature("semantic_text.skip_inference_fields");
124+
public static final NodeFeature SEMANTIC_TEXT_SUPPORT_CHUNKING_CONFIG = new NodeFeature("semantic_text.support_chunking_config");
124125

125126
public static final String CONTENT_TYPE = "semantic_text";
126127
public static final String DEFAULT_ELSER_2_INFERENCE_ID = DEFAULT_ELSER_ID;
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
setup:
2+
- requires:
3+
cluster_features: "semantic_text.support_chunking_config"
4+
reason: semantic_text chunking configuration added in 8.19
5+
6+
- do:
7+
inference.put:
8+
task_type: text_embedding
9+
inference_id: dense-inference-id
10+
body: >
11+
{
12+
"service": "text_embedding_test_service",
13+
"service_settings": {
14+
"model": "my_model",
15+
"dimensions": 10,
16+
"similarity": "cosine",
17+
"api_key": "abc64"
18+
},
19+
"task_settings": {
20+
}
21+
}
22+
23+
- do:
24+
inference.put:
25+
task_type: sparse_embedding
26+
inference_id: sparse-inference-id
27+
body: >
28+
{
29+
"service": "test_service",
30+
"service_settings": {
31+
"model": "my_model",
32+
"api_key": "abc64"
33+
},
34+
"task_settings": {
35+
}
36+
}
37+
38+
- do:
39+
indices.create:
40+
index: default-chunking
41+
body:
42+
mappings:
43+
properties:
44+
keyword_field:
45+
type: keyword
46+
inference_field:
47+
type: semantic_text
48+
inference_id: sparse-inference-id
49+
50+
- do:
51+
indices.create:
52+
index: custom-chunking
53+
body:
54+
mappings:
55+
properties:
56+
keyword_field:
57+
type: keyword
58+
inference_field:
59+
type: semantic_text
60+
inference_id: dense-inference-id
61+
chunking_settings:
62+
strategy: word
63+
max_chunk_size: 10
64+
overlap: 5
65+
66+
- do:
67+
index:
68+
index: default-chunking
69+
id: doc_1
70+
body:
71+
keyword_field: "default sentence chunking"
72+
inference_field: "Elasticsearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides."
73+
refresh: true
74+
75+
- do:
76+
index:
77+
index: custom-chunking
78+
id: doc_2
79+
body:
80+
keyword_field: "custom word chunking"
81+
inference_field: "Elasticsearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides."
82+
refresh: true
83+
84+
---
85+
"We return chunking configurations with mappings":
86+
87+
- do:
88+
indices.get_mapping:
89+
index: default-chunking
90+
91+
- is_false: default-chunking.mappings.properties.inference_field.chunking_settings
92+
93+
- do:
94+
indices.get_mapping:
95+
index: custom-chunking
96+
97+
- match: { "custom-chunking.mappings.properties.inference_field.chunking_settings.strategy": "word" }
98+
- match: { "custom-chunking.mappings.properties.inference_field.chunking_settings.max_chunk_size": 10 }
99+
- match: { "custom-chunking.mappings.properties.inference_field.chunking_settings.overlap": 5 }
100+
101+
---
102+
"We return different chunks based on configured chunking overrides or model defaults":
103+
104+
- do:
105+
search:
106+
index: default-chunking
107+
body:
108+
query:
109+
semantic:
110+
field: "inference_field"
111+
query: "What is Elasticsearch?"
112+
highlight:
113+
fields:
114+
inference_field:
115+
type: "semantic"
116+
number_of_fragments: 2
117+
118+
- match: { hits.total.value: 1 }
119+
- match: { hits.hits.0._id: "doc_1" }
120+
- length: { hits.hits.0.highlight.inference_field: 1 }
121+
- match: { hits.hits.0.highlight.inference_field.0: "Elasticsearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
122+
123+
- do:
124+
search:
125+
index: custom-chunking
126+
body:
127+
query:
128+
semantic:
129+
field: "inference_field"
130+
query: "What is Elasticsearch?"
131+
highlight:
132+
fields:
133+
inference_field:
134+
type: "semantic"
135+
number_of_fragments: 2
136+
137+
- match: { hits.total.value: 1 }
138+
- match: { hits.hits.0._id: "doc_2" }
139+
- length: { hits.hits.0.highlight.inference_field: 2 }
140+
- match: { hits.hits.0.highlight.inference_field.0: "Elasticsearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all" }
141+
- match: { hits.hits.0.highlight.inference_field.1: " Lucene internally and enjoys all the features it provides." }
142+

0 commit comments

Comments
 (0)