Skip to content

Commit 92d70dc

Browse files
committed
Add additional yaml test cases
1 parent 2ab5aec commit 92d70dc

File tree

1 file changed

+133
-0
lines changed

1 file changed

+133
-0
lines changed

x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/25_semantic_text_field_mapping_chunking.yml

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,3 +241,136 @@ setup:
241241
- length: { hits.hits.0.highlight.inference_field: 2 }
242242
- match: { hits.hits.0.highlight.inference_field.0: "Elasticsearch is an open source, distributed, RESTful, search engine which" }
243243
- match: { hits.hits.0.highlight.inference_field.1: " which is built on top of Lucene internally and enjoys" }
244+
245+
---
246+
"We respect multiple semantic_text fields with different chunking configurations":
247+
248+
- do:
249+
indices.create:
250+
index: mixed-chunking
251+
body:
252+
mappings:
253+
properties:
254+
keyword_field:
255+
type: keyword
256+
default_chunked_inference_field:
257+
type: semantic_text
258+
inference_id: sparse-inference-id
259+
customized_chunked_inference_field:
260+
type: semantic_text
261+
inference_id: sparse-inference-id
262+
chunking_settings:
263+
strategy: word
264+
max_chunk_size: 10
265+
overlap: 1
266+
267+
- do:
268+
index:
269+
index: mixed-chunking
270+
id: doc_1
271+
body:
272+
default_chunked_inference_field: "Elasticsearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides."
273+
customized_chunked_inference_field: "Elasticsearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides."
274+
refresh: true
275+
276+
- do:
277+
search:
278+
index: mixed-chunking
279+
body:
280+
query:
281+
bool:
282+
should:
283+
- semantic:
284+
field: "default_chunked_inference_field"
285+
query: "What is Elasticsearch?"
286+
- semantic:
287+
field: "customized_chunked_inference_field"
288+
query: "What is Elasticsearch?"
289+
highlight:
290+
fields:
291+
default_chunked_inference_field:
292+
type: "semantic"
293+
number_of_fragments: 2
294+
customized_chunked_inference_field:
295+
type: "semantic"
296+
number_of_fragments: 2
297+
298+
- match: { hits.total.value: 1 }
299+
- match: { hits.hits.0._id: "doc_1" }
300+
- length: { hits.hits.0.highlight.default_chunked_inference_field: 1 }
301+
- match: { hits.hits.0.highlight.default_chunked_inference_field.0: "Elasticsearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
302+
- length: { hits.hits.0.highlight.customized_chunked_inference_field: 2 }
303+
- match: { hits.hits.0.highlight.customized_chunked_inference_field.0: "Elasticsearch is an open source, distributed, RESTful, search engine which" }
304+
- match: { hits.hits.0.highlight.customized_chunked_inference_field.1: " which is built on top of Lucene internally and enjoys" }
305+
306+
---
307+
"Bulk requests are handled appropriately":
308+
309+
- do:
310+
indices.create:
311+
index: index1
312+
body:
313+
mappings:
314+
properties:
315+
keyword_field:
316+
type: keyword
317+
inference_field:
318+
type: semantic_text
319+
inference_id: sparse-inference-id
320+
chunking_settings:
321+
strategy: word
322+
max_chunk_size: 10
323+
overlap: 1
324+
325+
- do:
326+
indices.create:
327+
index: index2
328+
body:
329+
mappings:
330+
properties:
331+
keyword_field:
332+
type: keyword
333+
inference_field:
334+
type: semantic_text
335+
inference_id: sparse-inference-id
336+
337+
- do:
338+
bulk:
339+
refresh: true
340+
body: |
341+
{ "index": { "_index": "index1", "_id": "doc_1" }}
342+
{ "inference_field": "Elasticsearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
343+
{ "index": { "_index": "index2", "_id": "doc_2" }}
344+
{ "inference_field": "Elasticsearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
345+
{ "index": { "_index": "index1", "_id": "doc_3" }}
346+
{ "inference_field": "Elasticsearch is a free, open-source search engine and analytics tool that stores and indexes data." }
347+
348+
- do:
349+
search:
350+
index: index1,index2
351+
body:
352+
query:
353+
semantic:
354+
field: "inference_field"
355+
query: "What is Elasticsearch?"
356+
highlight:
357+
fields:
358+
inference_field:
359+
type: "semantic"
360+
number_of_fragments: 2
361+
362+
- match: { hits.total.value: 3 }
363+
364+
- match: { hits.hits.0._id: "doc_3" }
365+
- length: { hits.hits.0.highlight.inference_field: 2 }
366+
- match: { hits.hits.0.highlight.inference_field.0: "Elasticsearch is a free, open-source search engine and analytics" }
367+
- match: { hits.hits.0.highlight.inference_field.1: " analytics tool that stores and indexes data." }
368+
369+
- match: { hits.hits.1._id: "doc_1" }
370+
- length: { hits.hits.1.highlight.inference_field: 2 }
371+
- match: { hits.hits.1.highlight.inference_field.0: "Elasticsearch is an open source, distributed, RESTful, search engine which" }
372+
- match: { hits.hits.1.highlight.inference_field.1: " which is built on top of Lucene internally and enjoys" }
373+
374+
- match: { hits.hits.2._id: "doc_2" }
375+
- length: { hits.hits.2.highlight.inference_field: 1 }
376+
- match: { hits.hits.2.highlight.inference_field.0: "Elasticsearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }

0 commit comments

Comments
 (0)