@@ -291,3 +291,108 @@ setup:
291291 - match : { hits.hits.0._id: "doc_1" }
292292 - not_exists : hits.hits.0.highlight.title
293293
294+ ---
295+ " Highlighting and multi chunks with empty input " :
296+ - requires :
297+ cluster_features : " semantic_text.handle_empty_input"
298+ reason : skips generating embeddings when semantic_text field is contains empty or whitespace only input
299+
300+ - do :
301+ indices.create :
302+ index : test-multi-chunk-index
303+ body :
304+ settings :
305+ index.mapping.semantic_text.use_legacy_format : false
306+ mappings :
307+ properties :
308+ semantic_text_field :
309+ type : semantic_text
310+ inference_id : sparse-inference-id
311+
312+ - do :
313+ index :
314+ index : test-multi-chunk-index
315+ id : doc_1
316+ body :
317+ semantic_text_field : ["some test data", " ", "now with chunks"]
318+ refresh : true
319+
320+ - do :
321+ search :
322+ index : test-multi-chunk-index
323+ body :
324+ query :
325+ semantic :
326+ field : " semantic_text_field"
327+ query : " test"
328+ highlight :
329+ fields :
330+ semantic_text_field :
331+ type : " semantic"
332+ number_of_fragments : 1
333+
334+ - match : { hits.total.value: 1 }
335+ - match : { hits.hits.0._id: "doc_1" }
336+ - length : { hits.hits.0.highlight.semantic_text_field: 1 }
337+ - match : { hits.hits.0.highlight.semantic_text_field.0: "now with chunks" }
338+
339+ - do :
340+ search :
341+ index : test-multi-chunk-index
342+ body :
343+ query :
344+ semantic :
345+ field : " semantic_text_field"
346+ query : " test"
347+ highlight :
348+ fields :
349+ semantic_text_field :
350+ type : " semantic"
351+ number_of_fragments : 2
352+
353+ - match : { hits.total.value: 1 }
354+ - match : { hits.hits.0._id: "doc_1" }
355+ - length : { hits.hits.0.highlight.semantic_text_field: 2}
356+ - match : { hits.hits.0.highlight.semantic_text_field.0: "some test data" }
357+ - match : { hits.hits.0.highlight.semantic_text_field.1: "now with chunks" }
358+
359+ - do :
360+ search :
361+ index : test-multi-chunk-index
362+ body :
363+ query :
364+ semantic :
365+ field : " semantic_text_field"
366+ query : " test"
367+ highlight :
368+ fields :
369+ semantic_text_field :
370+ type : " semantic"
371+ order : " score"
372+ number_of_fragments : 1
373+
374+ - match : { hits.total.value: 1 }
375+ - match : { hits.hits.0._id: "doc_1" }
376+ - length : { hits.hits.0.highlight.semantic_text_field: 1 }
377+ - match : { hits.hits.0.highlight.semantic_text_field.0: "now with chunks" }
378+
379+ - do :
380+ search :
381+ index : test-multi-chunk-index
382+ body :
383+ query :
384+ semantic :
385+ field : " semantic_text_field"
386+ query : " test"
387+ highlight :
388+ fields :
389+ semantic_text_field :
390+ type : " semantic"
391+ order : " score"
392+ number_of_fragments : 2
393+
394+ - match : { hits.total.value: 1 }
395+ - match : { hits.hits.0._id: "doc_1" }
396+ - length : { hits.hits.0.highlight.semantic_text_field: 2}
397+ - match : { hits.hits.0.highlight.semantic_text_field.0: "now with chunks" }
398+ - match : { hits.hits.0.highlight.semantic_text_field.1: "some test data" }
0 commit comments