Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/changelog/129150.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 129150
summary: Add `none` chunking strategy to disable automatic chunking for inference
endpoints
area: Machine Learning
type: feature
issues: []
54 changes: 50 additions & 4 deletions docs/reference/elasticsearch/mapping-reference/semantic-text.md
Original file line number Diff line number Diff line change
Expand Up @@ -117,15 +117,16 @@ If specified, these will override the chunking settings set in the {{infer-cap}}
endpoint associated with `inference_id`.
If chunking settings are updated, they will not be applied to existing documents
until they are reindexed.
To completely disable chunking, use the `none` chunking strategy.

**Valid values for `chunking_settings`**:

`type`
: Indicates the type of chunking strategy to use. Valid values are `word` or
: Indicates the type of chunking strategy to use. Valid values are `none`, `word` or
`sentence`. Required.

`max_chunk_size`
: The maximum number of works in a chunk. Required.
: The maximum number of works in a chunk. Required for `word` and `sentence` strategies.

`overlap`
: The number of overlapping words allowed in chunks. This cannot be defined as
Expand All @@ -136,6 +137,12 @@ until they are reindexed.
: The number of overlapping sentences allowed in chunks. Valid values are `0`
or `1`. Required for `sentence` type chunking settings

::::{warning}
When using the `none` chunking strategy, if the input exceeds the maximum token limit of the underlying model,
some services (such as OpenAI) may return an error. In contrast, the `elastic` and `elasticsearch` services
will automatically truncate the input to fit within the model's limit.
::::

## {{infer-cap}} endpoint validation [infer-endpoint-validation]

The `inference_id` will not be validated when the mapping is created, but when
Expand Down Expand Up @@ -166,10 +173,49 @@ For more details on chunking and how to configure chunking settings,
see [Configuring chunking](https://www.elastic.co/docs/api/doc/elasticsearch/group/endpoint-inference)
in the Inference API documentation.

You can pre-chunk the input by sending it to Elasticsearch as an array of strings.
Example:

```console
PUT test-index
{
"mappings": {
"properties": {
"my_semantic_field": {
"type": "semantic_text",
"chunking_settings": {
"strategy": "none" <1>
}
}
}
}
}
```

1. Disable chunking on `my_semantic_field`.

```console
PUT test-index/_doc/1
{
"my_semantic_field": ["my first chunk", "my second chunk", ...] <1>
...
}
```

1. The text is pre-chunked and provided as an array of strings.
Each element in the array represents a single chunk that will be sent directly to the inference service without further chunking.

**Important considerations**:

* When providing pre-chunked input, ensure that you set the chunking strategy to `none` to avoid additional processing.
* Each chunk should be sized carefully, staying within the token limit of the inference service and the underlying model.
* If a chunk exceeds the model's token limit, the behavior depends on the service:
* Some services (such as OpenAI) will return an error.
* Others (such as `elastic` and `elasticsearch`) will automatically truncate the input.

Refer
to [this tutorial](docs-content://solutions/search/semantic-search/semantic-search-semantic-text.md)
to learn more about semantic search using `semantic_text` and the `semantic`
query.
to learn more about semantic search using `semantic_text`.

## Extracting Relevant Fragments from Semantic Text [semantic-text-highlighting]

Expand Down
2 changes: 2 additions & 0 deletions server/src/main/java/org/elasticsearch/TransportVersions.java
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,7 @@ static TransportVersion def(int id) {
public static final TransportVersion ESQL_QUERY_PLANNING_DURATION_8_19 = def(8_841_0_45);
public static final TransportVersion SEARCH_SOURCE_EXCLUDE_VECTORS_PARAM_8_19 = def(8_841_0_46);
public static final TransportVersion ML_INFERENCE_MISTRAL_CHAT_COMPLETION_ADDED_8_19 = def(8_841_0_47);
public static final TransportVersion NONE_CHUNKING_STRATEGY_8_19 = def(8_841_0_48);
public static final TransportVersion V_9_0_0 = def(9_000_0_09);
public static final TransportVersion INITIAL_ELASTICSEARCH_9_0_1 = def(9_000_0_10);
public static final TransportVersion INITIAL_ELASTICSEARCH_9_0_2 = def(9_000_0_11);
Expand Down Expand Up @@ -289,6 +290,7 @@ static TransportVersion def(int id) {
public static final TransportVersion ML_INFERENCE_MISTRAL_CHAT_COMPLETION_ADDED = def(9_090_0_00);
public static final TransportVersion IDP_CUSTOM_SAML_ATTRIBUTES_ALLOW_LIST = def(9_091_0_00);
public static final TransportVersion SEARCH_SOURCE_EXCLUDE_VECTORS_PARAM = def(9_092_0_00);
public static final TransportVersion NONE_CHUNKING_STRATEGY = def(9_093_0_00);
/*
* STOP! READ THIS FIRST! No, really,
* ____ _____ ___ ____ _ ____ _____ _ ____ _____ _ _ ___ ____ _____ ___ ____ ____ _____ _
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@

public enum ChunkingStrategy {
WORD("word"),
SENTENCE("sentence");
SENTENCE("sentence"),
NONE("none");

private final String chunkingStrategy;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,9 @@ protected List<ChunkedInput> chunkInputs(ChunkInferenceInput input) {
}

List<ChunkedInput> chunkedInputs = new ArrayList<>();
if (chunkingSettings.getChunkingStrategy() == ChunkingStrategy.WORD) {
if (chunkingSettings.getChunkingStrategy() == ChunkingStrategy.NONE) {
return List.of(new ChunkedInput(inputText, 0, inputText.length()));
} else if (chunkingSettings.getChunkingStrategy() == ChunkingStrategy.WORD) {
WordBoundaryChunker chunker = new WordBoundaryChunker();
WordBoundaryChunkingSettings wordBoundaryChunkingSettings = (WordBoundaryChunkingSettings) chunkingSettings;
List<WordBoundaryChunker.ChunkOffset> offsets = chunker.chunk(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import org.elasticsearch.xpack.core.inference.results.TextEmbeddingByteResults;
import org.elasticsearch.xpack.core.inference.results.TextEmbeddingFloatResults;
import org.elasticsearch.xpack.inference.action.task.StreamingTaskManager;
import org.elasticsearch.xpack.inference.chunking.NoneChunkingSettings;
import org.elasticsearch.xpack.inference.chunking.SentenceBoundaryChunkingSettings;
import org.elasticsearch.xpack.inference.chunking.WordBoundaryChunkingSettings;
import org.elasticsearch.xpack.inference.common.amazon.AwsSecretSettings;
Expand Down Expand Up @@ -552,6 +553,9 @@ private static void addInternalNamedWriteables(List<NamedWriteableRegistry.Entry
}

private static void addChunkingSettingsNamedWriteables(List<NamedWriteableRegistry.Entry> namedWriteables) {
namedWriteables.add(
new NamedWriteableRegistry.Entry(ChunkingSettings.class, NoneChunkingSettings.NAME, in -> NoneChunkingSettings.INSTANCE)
);
namedWriteables.add(
new NamedWriteableRegistry.Entry(ChunkingSettings.class, WordBoundaryChunkingSettings.NAME, WordBoundaryChunkingSettings::new)
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ public static Chunker fromChunkingStrategy(ChunkingStrategy chunkingStrategy) {
}

return switch (chunkingStrategy) {
case NONE -> NoopChunker.INSTANCE;
case WORD -> new WordBoundaryChunker();
case SENTENCE -> new SentenceBoundaryChunker();
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ public static ChunkingSettings fromMap(Map<String, Object> settings, boolean ret
settings.get(ChunkingSettingsOptions.STRATEGY.toString()).toString()
);
return switch (chunkingStrategy) {
case NONE -> NoneChunkingSettings.INSTANCE;
case WORD -> WordBoundaryChunkingSettings.fromMap(new HashMap<>(settings));
case SENTENCE -> SentenceBoundaryChunkingSettings.fromMap(new HashMap<>(settings));
};
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

package org.elasticsearch.xpack.inference.chunking;

import org.elasticsearch.TransportVersion;
import org.elasticsearch.TransportVersions;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.ValidationException;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.inference.ChunkingSettings;
import org.elasticsearch.inference.ChunkingStrategy;
import org.elasticsearch.xcontent.XContentBuilder;

import java.io.IOException;
import java.util.Arrays;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.Set;

public class NoneChunkingSettings implements ChunkingSettings {
public static final String NAME = "NoneChunkingSettings";
public static NoneChunkingSettings INSTANCE = new NoneChunkingSettings();

private static final ChunkingStrategy STRATEGY = ChunkingStrategy.NONE;
private static final Set<String> VALID_KEYS = Set.of(ChunkingSettingsOptions.STRATEGY.toString());

private NoneChunkingSettings() {}

public NoneChunkingSettings(StreamInput in) throws IOException {}

@Override
public ChunkingStrategy getChunkingStrategy() {
return STRATEGY;
}

@Override
public String getWriteableName() {
return NAME;
}

@Override
public TransportVersion getMinimalSupportedVersion() {
throw new IllegalStateException("not used");
}

@Override
public boolean supportsVersion(TransportVersion version) {
return version.isPatchFrom(TransportVersions.NONE_CHUNKING_STRATEGY_8_19)
|| version.onOrAfter(TransportVersions.NONE_CHUNKING_STRATEGY);
}

@Override
public void writeTo(StreamOutput out) throws IOException {}

@Override
public Map<String, Object> asMap() {
return Map.of(ChunkingSettingsOptions.STRATEGY.toString(), STRATEGY.toString().toLowerCase(Locale.ROOT));
}

public static NoneChunkingSettings fromMap(Map<String, Object> map) {
ValidationException validationException = new ValidationException();

var invalidSettings = map.keySet().stream().filter(key -> VALID_KEYS.contains(key) == false).toArray();
if (invalidSettings.length > 0) {
validationException.addValidationError(
Strings.format("Sentence based chunking settings can not have the following settings: %s", Arrays.toString(invalidSettings))
);
}

if (validationException.validationErrors().isEmpty() == false) {
throw validationException;
}

return new NoneChunkingSettings();
}

@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
{
builder.field(ChunkingSettingsOptions.STRATEGY.toString(), STRATEGY);
}
builder.endObject();
return builder;
}

@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
return true;
}

@Override
public int hashCode() {
return Objects.hash(getClass());
}

@Override
public String toString() {
return Strings.toString(this);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

package org.elasticsearch.xpack.inference.chunking;

import org.elasticsearch.common.Strings;
import org.elasticsearch.inference.ChunkingSettings;
import org.elasticsearch.xpack.inference.services.openai.embeddings.OpenAiEmbeddingsModel;

import java.util.List;

/**
* A {@link Chunker} implementation that returns the input unchanged (no chunking is performed).
*
* <p><b>WARNING</b>If the input exceeds the maximum token limit, some services (such as {@link OpenAiEmbeddingsModel})
* may return an error.
* </p>
*/
public class NoopChunker implements Chunker {
static final NoopChunker INSTANCE = new NoopChunker();

private NoopChunker() {}

@Override
public List<ChunkOffset> chunk(String input, ChunkingSettings chunkingSettings) {
if (chunkingSettings instanceof NoneChunkingSettings) {
return List.of(new ChunkOffset(0, input.length()));
} else {
throw new IllegalArgumentException(
Strings.format("NoopChunker can't use ChunkingSettings with strategy [%s]", chunkingSettings.getChunkingStrategy())
);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,13 @@ public void testValidChunkingStrategy() {
}

private Map<ChunkingStrategy, Class<? extends Chunker>> chunkingStrategyToExpectedChunkerClassMap() {
return Map.of(ChunkingStrategy.WORD, WordBoundaryChunker.class, ChunkingStrategy.SENTENCE, SentenceBoundaryChunker.class);
return Map.of(
ChunkingStrategy.NONE,
NoopChunker.class,
ChunkingStrategy.WORD,
WordBoundaryChunker.class,
ChunkingStrategy.SENTENCE,
SentenceBoundaryChunker.class
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ public static ChunkingSettings createRandomChunkingSettings() {
ChunkingStrategy randomStrategy = randomFrom(ChunkingStrategy.values());

switch (randomStrategy) {
case NONE -> {
return NoneChunkingSettings.INSTANCE;
}
case WORD -> {
var maxChunkSize = randomIntBetween(10, 300);
return new WordBoundaryChunkingSettings(maxChunkSize, randomIntBetween(1, maxChunkSize / 2));
Expand All @@ -37,15 +40,15 @@ public static Map<String, Object> createRandomChunkingSettingsMap() {
chunkingSettingsMap.put(ChunkingSettingsOptions.STRATEGY.toString(), randomStrategy.toString());

switch (randomStrategy) {
case NONE -> {
}
case WORD -> {
var maxChunkSize = randomIntBetween(10, 300);
chunkingSettingsMap.put(ChunkingSettingsOptions.MAX_CHUNK_SIZE.toString(), maxChunkSize);
chunkingSettingsMap.put(ChunkingSettingsOptions.OVERLAP.toString(), randomIntBetween(1, maxChunkSize / 2));

}
case SENTENCE -> {
chunkingSettingsMap.put(ChunkingSettingsOptions.MAX_CHUNK_SIZE.toString(), randomIntBetween(20, 300));
}
case SENTENCE -> chunkingSettingsMap.put(ChunkingSettingsOptions.MAX_CHUNK_SIZE.toString(), randomIntBetween(20, 300));
default -> {
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,22 @@ public void testEmptyInput_SentenceChunker() {
assertThat(batches, empty());
}

public void testEmptyInput_NoopChunker() {
var batches = new EmbeddingRequestChunker<>(List.of(), 10, NoneChunkingSettings.INSTANCE).batchRequestsWithListeners(
testListener()
);
assertThat(batches, empty());
}

public void testAnyInput_NoopChunker() {
var randomInput = randomAlphaOfLengthBetween(100, 1000);
var batches = new EmbeddingRequestChunker<>(List.of(new ChunkInferenceInput(randomInput)), 10, NoneChunkingSettings.INSTANCE)
.batchRequestsWithListeners(testListener());
assertThat(batches, hasSize(1));
assertThat(batches.get(0).batch().inputs().get(), hasSize(1));
assertThat(batches.get(0).batch().inputs().get().get(0), Matchers.is(randomInput));
}

public void testWhitespaceInput_SentenceChunker() {
var batches = new EmbeddingRequestChunker<>(
List.of(new ChunkInferenceInput(" ")),
Expand Down
Loading
Loading