Skip to content

Commit 27cdf6b

Browse files
authored
Merge branch 'main' into fix_plugin_examples
2 parents d71ed35 + a626d9c commit 27cdf6b

File tree

5 files changed

+78
-24
lines changed

5 files changed

+78
-24
lines changed

docs/reference/elasticsearch/mapping-reference/semantic-text.md

Lines changed: 37 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22
navigation_title: "Semantic text"
33
mapped_pages:
44
- https://www.elastic.co/guide/en/elasticsearch/reference/current/semantic-text.html
5+
applies_to:
6+
stack: ga 9.0
7+
serverless: ga
58
---
69

710
# Semantic text field type [semantic-text]
@@ -29,7 +32,8 @@ service.
2932
Using `semantic_text`, you won’t need to specify how to generate embeddings for
3033
your data, or how to index it. The {{infer}} endpoint automatically determines
3134
the embedding generation, indexing, and query to use.
32-
Newly created indices with `semantic_text` fields using dense embeddings will be
35+
36+
{applies_to}`stack: ga 9.1` Newly created indices with `semantic_text` fields using dense embeddings will be
3337
[quantized](/reference/elasticsearch/mapping-reference/dense-vector.md#dense-vector-quantization)
3438
to `bbq_hnsw` automatically.
3539

@@ -182,6 +186,15 @@ For more details on chunking and how to configure chunking settings,
182186
see [Configuring chunking](https://www.elastic.co/docs/api/doc/elasticsearch/group/endpoint-inference)
183187
in the Inference API documentation.
184188

189+
Refer
190+
to [this tutorial](docs-content://solutions/search/semantic-search/semantic-search-semantic-text.md)
191+
to learn more about semantic search using `semantic_text`.
192+
193+
### Pre-chunking [pre-chunking]
194+
```{applies_to}
195+
stack: ga 9.1
196+
```
197+
185198
You can pre-chunk the input by sending it to Elasticsearch as an array of
186199
strings.
187200
Example:
@@ -228,10 +241,6 @@ PUT test-index/_doc/1
228241
* Others (such as `elastic` and `elasticsearch`) will automatically truncate
229242
the input.
230243

231-
Refer
232-
to [this tutorial](docs-content://solutions/search/semantic-search/semantic-search-semantic-text.md)
233-
to learn more about semantic search using `semantic_text`.
234-
235244
## Extracting relevant fragments from semantic text [semantic-text-highlighting]
236245

237246
You can extract the most relevant fragments from a semantic text field by using
@@ -295,6 +304,11 @@ specified. It enables you to quickstart your semantic search by providing
295304
automatic {{infer}} and a dedicated query so you don’t need to provide further
296305
details.
297306

307+
### Customizing using `semantic_text` parameters [custom-by-parameters]
308+
```{applies_to}
309+
stack: ga 9.1
310+
```
311+
298312
If you want to override those defaults and customize the embeddings that
299313
`semantic_text` indexes, you can do so by
300314
modifying [parameters](#semantic-text-params):
@@ -328,6 +342,24 @@ PUT my-index-000004
328342
}
329343
```
330344

345+
### Customizing using ingest pipelines [custom-by-pipelines]
346+
```{applies_to}
347+
stack: ga 9.0
348+
```
349+
350+
In case you want to customize data indexing, use the
351+
[`sparse_vector`](/reference/elasticsearch/mapping-reference/sparse-vector.md)
352+
or [`dense_vector`](/reference/elasticsearch/mapping-reference/dense-vector.md)
353+
field types and create an ingest pipeline with an
354+
[{{infer}} processor](/reference/enrich-processor/inference-processor.md) to
355+
generate the embeddings.
356+
[This tutorial](docs-content://solutions/search/semantic-search/semantic-search-inference.md)
357+
walks you through the process. In these cases - when you use `sparse_vector` or
358+
`dense_vector` field types instead of the `semantic_text` field type to
359+
customize indexing - using the
360+
[`semantic_query`](/reference/query-languages/query-dsl/query-dsl-semantic-query.md)
361+
is not supported for querying the field data.
362+
331363
## Updates to `semantic_text` fields [update-script]
332364

333365
For indices containing `semantic_text` fields, updates that use scripts have the

docs/reference/query-languages/query-dsl/query-dsl-semantic-query.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22
navigation_title: "Semantic"
33
mapped_pages:
44
- https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-semantic-query.html
5+
applies_to:
6+
stack: ga 9.0
7+
serverless: ga
58
---
69

710
# Semantic query [query-dsl-semantic-query]

muted-tests.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -512,6 +512,12 @@ tests:
512512
- class: org.elasticsearch.xpack.stack.StackYamlIT
513513
method: test {yaml=stack/10_basic/Test wrong data_stream type - logs from 9.2.0}
514514
issue: https://github.com/elastic/elasticsearch/issues/131803
515+
- class: org.elasticsearch.packaging.test.DockerTests
516+
method: test151MachineDependentHeapWithSizeOverride
517+
issue: https://github.com/elastic/elasticsearch/issues/123437
518+
- class: org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapperTests
519+
method: testUpdates
520+
issue: https://github.com/elastic/elasticsearch/issues/131795
515521

516522
# Examples:
517523
#

server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -548,11 +548,7 @@ public BlockLoader.Block read(BlockFactory factory, Docs docs, int offset) throw
548548
// Doubles from doc values ensures that the values are in order
549549
try (BlockLoader.FloatBuilder builder = factory.denseVectors(docs.count() - offset, dimensions)) {
550550
for (int i = offset; i < docs.count(); i++) {
551-
int doc = docs.get(i);
552-
if (doc < iterator.docID()) {
553-
throw new IllegalStateException("docs within same block must be in order");
554-
}
555-
read(doc, builder);
551+
read(docs.get(i), builder);
556552
}
557553
return builder.build();
558554
}
@@ -564,7 +560,9 @@ public void read(int docId, BlockLoader.StoredFields storedFields, Builder build
564560
}
565561

566562
private void read(int doc, BlockLoader.FloatBuilder builder) throws IOException {
567-
if (iterator.advance(doc) == doc) {
563+
if (iterator.docID() > doc) {
564+
builder.appendNull();
565+
} else if (iterator.docID() == doc || iterator.advance(doc) == doc) {
568566
builder.beginPositionEntry();
569567
float[] floats = floatVectorValues.vectorValue(iterator.index());
570568
assert floats.length == dimensions

x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/DenseVectorFieldTypeIT.java

Lines changed: 28 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -94,10 +94,14 @@ public void testRetrieveTopNDenseVectorFieldData() {
9494
var values = valuesList.get(id);
9595
assertEquals(id, values.get(0));
9696
List<Float> vectors = (List<Float>) values.get(1);
97-
assertNotNull(vectors);
98-
assertEquals(vector.size(), vectors.size());
99-
for (int i = 0; i < vector.size(); i++) {
100-
assertEquals(vector.get(i), vectors.get(i), 0F);
97+
if (vector == null) {
98+
assertNull(vectors);
99+
} else {
100+
assertNotNull(vectors);
101+
assertEquals(vector.size(), vectors.size());
102+
for (int i = 0; i < vector.size(); i++) {
103+
assertEquals(vector.get(i), vectors.get(i), 0F);
104+
}
101105
}
102106
});
103107
}
@@ -117,12 +121,18 @@ public void testRetrieveDenseVectorFieldData() {
117121
;
118122
assertEquals(2, value.size());
119123
Integer id = (Integer) value.get(0);
120-
List<Float> vector = (List<Float>) value.get(1);
121-
assertNotNull(vector);
122124
List<Float> expectedVector = indexedVectors.get(id);
123-
assertNotNull(expectedVector);
124-
for (int i = 0; i < vector.size(); i++) {
125-
assertEquals(expectedVector.get(i), vector.get(i), 0F);
125+
List<Float> vector = (List<Float>) value.get(1);
126+
if (expectedVector == null) {
127+
assertNull(vector);
128+
} else {
129+
assertNotNull(vector);
130+
assertEquals(expectedVector.size(), vector.size());
131+
assertNotNull(vector);
132+
assertNotNull(expectedVector);
133+
for (int i = 0; i < vector.size(); i++) {
134+
assertEquals(expectedVector.get(i), vector.get(i), 0F);
135+
}
126136
}
127137
});
128138
}
@@ -168,11 +178,16 @@ public void setup() throws IOException {
168178
IndexRequestBuilder[] docs = new IndexRequestBuilder[numDocs];
169179
for (int i = 0; i < numDocs; i++) {
170180
List<Float> vector = new ArrayList<>(numDims);
171-
for (int j = 0; j < numDims; j++) {
172-
vector.add(randomFloat());
181+
if (rarely()) {
182+
docs[i] = prepareIndex("test").setId("" + i).setSource("id", String.valueOf(i));
183+
indexedVectors.put(i, null);
184+
} else {
185+
for (int j = 0; j < numDims; j++) {
186+
vector.add(randomFloat());
187+
}
188+
docs[i] = prepareIndex("test").setId("" + i).setSource("id", String.valueOf(i), "vector", vector);
189+
indexedVectors.put(i, vector);
173190
}
174-
docs[i] = prepareIndex("test").setId("" + i).setSource("id", String.valueOf(i), "vector", vector);
175-
indexedVectors.put(i, vector);
176191
}
177192

178193
indexRandom(true, docs);

0 commit comments

Comments
 (0)