Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
f054dca
Add working dense text embeddings integration with default endpoint. …
timgrein Jun 23, 2025
9ca7369
Merge branch 'main' into eis-text-embedding-task-type
timgrein Jun 23, 2025
6584dab
Fix merge conflicts, compilation errors and test failures
timgrein Jun 23, 2025
9d47176
Spotless apply
timgrein Jun 23, 2025
3e8c70a
Add ElasticInferenceServiceDenseTextEmbeddingsRequestTests
timgrein Jun 23, 2025
23e7595
Add ElasticInferenceServiceDenseTextEmbeddingsRequestEntityTests
timgrein Jun 23, 2025
5af7516
Add "-v1" to multilingual-embed
timgrein Jun 23, 2025
fddfd9d
Add ElasticInferenceServiceDenseTextEmbeddingsServiceSettingsTests.java
timgrein Jun 23, 2025
9b48dfb
Add dense text embedding test cases to ElasticInferenceServiceActionC…
timgrein Jun 23, 2025
dbdadbe
[CI] Auto commit changes from spotless
Jun 23, 2025
e2f872e
Add ElasticInferenceServiceDenseTextEmbeddingsResponseEntityTests
timgrein Jun 23, 2025
485dd89
Merge remote-tracking branch 'origin/eis-text-embedding-task-type' in…
timgrein Jun 23, 2025
172070a
Merge branch 'main' into eis-text-embedding-task-type
timgrein Jun 23, 2025
6a35870
Fix compilation error after resolving merge conflict and spotlessAppl
timgrein Jun 23, 2025
a8b604b
Merge branch 'main' into eis-text-embedding-task-type
brendan-jugan-elastic Jun 23, 2025
3b486b7
remove dimensions_set_by_user
brendan-jugan-elastic Jun 23, 2025
6ffcc22
Merge branch 'main' into eis-text-embedding-task-type
brendan-jugan-elastic Jun 23, 2025
3489a09
[CI] Auto commit changes from spotless
Jun 23, 2025
fb5dbc0
fix checkstyle
brendan-jugan-elastic Jun 23, 2025
1dcbcab
fix checkstyle
brendan-jugan-elastic Jun 23, 2025
dc6f320
[CI] Auto commit changes from spotless
Jun 23, 2025
087d4e5
use ConstructingObjectParser for response parsing
brendan-jugan-elastic Jun 24, 2025
cd3e116
[CI] Auto commit changes from spotless
Jun 24, 2025
aa24341
Merge branch 'main' into eis-text-embedding-task-type
timgrein Jun 24, 2025
7269c51
Some cleanup (removing unused vars etc.)
timgrein Jun 24, 2025
220e208
Fix integration test
timgrein Jun 24, 2025
27ca440
Do not set usage context, if it's null
timgrein Jun 24, 2025
b7d10b8
Pass through chunking settings and provide default for default endpoint
timgrein Jun 24, 2025
3164c6c
Merge branch 'main' into eis-text-embedding-task-type
timgrein Jun 24, 2025
fc11815
After merge conflict resolution clean-up
timgrein Jun 24, 2025
59f84a9
Merge branch 'main' into eis-text-embedding-task-type
timgrein Jun 24, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions server/src/main/java/org/elasticsearch/TransportVersions.java
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,7 @@ static TransportVersion def(int id) {
public static final TransportVersion ML_INFERENCE_CUSTOM_SERVICE_EMBEDDING_BATCH_SIZE_8_19 = def(8_841_0_53);
public static final TransportVersion STREAMS_LOGS_SUPPORT_8_19 = def(8_841_0_54);
public static final TransportVersion ML_INFERENCE_CUSTOM_SERVICE_INPUT_TYPE_8_19 = def(8_841_0_55);
public static final TransportVersion ML_INFERENCE_ELASTIC_DENSE_TEXT_EMBEDDINGS_ADDED_8_19 = def(8_841_0_56);

public static final TransportVersion V_9_0_0 = def(9_000_0_09);
public static final TransportVersion INITIAL_ELASTICSEARCH_9_0_1 = def(9_000_0_10);
Expand Down Expand Up @@ -310,6 +311,7 @@ static TransportVersion def(int id) {
public static final TransportVersion ML_INFERENCE_CUSTOM_SERVICE_EMBEDDING_BATCH_SIZE = def(9_103_0_00);
public static final TransportVersion STREAMS_LOGS_SUPPORT = def(9_104_0_00);
public static final TransportVersion ML_INFERENCE_CUSTOM_SERVICE_INPUT_TYPE = def(9_105_0_00);
public static final TransportVersion ML_INFERENCE_ELASTIC_DENSE_TEXT_EMBEDDINGS_ADDED = def(9_106_00_0);

/*
* STOP! READ THIS FIRST! No, really,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ public void testGetDefaultEndpoints() throws IOException {
var allModels = getAllModels();
var chatCompletionModels = getModels("_all", TaskType.CHAT_COMPLETION);

assertThat(allModels, hasSize(5));
assertThat(allModels, hasSize(6));
assertThat(chatCompletionModels, hasSize(1));

for (var model : chatCompletionModels) {
Expand All @@ -42,6 +42,7 @@ public void testGetDefaultEndpoints() throws IOException {

assertInferenceIdTaskType(allModels, ".rainbow-sprinkles-elastic", TaskType.CHAT_COMPLETION);
assertInferenceIdTaskType(allModels, ".elser-v2-elastic", TaskType.SPARSE_EMBEDDING);
assertInferenceIdTaskType(allModels, ".multilingual-embed-v1-elastic", TaskType.TEXT_EMBEDDING);
}

private static void assertInferenceIdTaskType(List<Map<String, Object>> models, String inferenceId, TaskType taskType) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ private Iterable<String> providers(List<Object> services) {

public void testGetServicesWithTextEmbeddingTaskType() throws IOException {
List<Object> services = getServices(TaskType.TEXT_EMBEDDING);
assertThat(services.size(), equalTo(17));
assertThat(services.size(), equalTo(18));

var providers = providers(services);

Expand All @@ -93,6 +93,7 @@ public void testGetServicesWithTextEmbeddingTaskType() throws IOException {
"azureopenai",
"cohere",
"custom",
"elastic",
"elasticsearch",
"googleaistudio",
"googlevertexai",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ public void enqueueAuthorizeAllModelsResponse() {
{
"model_name": "elser-v2",
"task_types": ["embed/text/sparse"]
},
{
"model_name": "multilingual-embed-v1",
"task_types": ["embed/text/dense"]
}
]
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import org.elasticsearch.action.support.PlainActionFuture;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper;
import org.elasticsearch.inference.InferenceService;
import org.elasticsearch.inference.MinimalServiceSettings;
import org.elasticsearch.inference.Model;
Expand Down Expand Up @@ -197,6 +198,10 @@ public void testRemoves_DefaultChatCompletion_V1_WhenAuthorizationDoesNotReturnA
{
"model_name": "elser-v2",
"task_types": ["embed/text/sparse"]
},
{
"model_name": "multilingual-embed-v1",
"task_types": ["embed/text/dense"]
}
]
}
Expand All @@ -221,16 +226,33 @@ public void testRemoves_DefaultChatCompletion_V1_WhenAuthorizationDoesNotReturnA
".rainbow-sprinkles-elastic",
MinimalServiceSettings.chatCompletion(ElasticInferenceService.NAME),
service
),
new InferenceService.DefaultConfigId(
".multilingual-embed-v1-elastic",
MinimalServiceSettings.textEmbedding(
ElasticInferenceService.NAME,
ElasticInferenceService.DENSE_TEXT_EMBEDDINGS_DIMENSIONS,
ElasticInferenceService.defaultDenseTextEmbeddingsSimilarity(),
DenseVectorFieldMapper.ElementType.FLOAT
),
service
)
)
)
);
assertThat(service.supportedTaskTypes(), is(EnumSet.of(TaskType.CHAT_COMPLETION, TaskType.SPARSE_EMBEDDING)));
assertThat(
service.supportedTaskTypes(),
is(EnumSet.of(TaskType.CHAT_COMPLETION, TaskType.SPARSE_EMBEDDING, TaskType.TEXT_EMBEDDING))
);

PlainActionFuture<List<Model>> listener = new PlainActionFuture<>();
service.defaultConfigs(listener);
assertThat(listener.actionGet(TIMEOUT).get(0).getConfigurations().getInferenceEntityId(), is(".elser-v2-elastic"));
assertThat(listener.actionGet(TIMEOUT).get(1).getConfigurations().getInferenceEntityId(), is(".rainbow-sprinkles-elastic"));
assertThat(
listener.actionGet(TIMEOUT).get(2).getConfigurations().getInferenceEntityId(),
is(".multilingual-embed-v1-elastic")
);

var getModelListener = new PlainActionFuture<UnparsedModel>();
// persists the default endpoints
Expand Down Expand Up @@ -267,6 +289,16 @@ public void testRemoves_DefaultChatCompletion_V1_WhenAuthorizationDoesNotReturnA
".elser-v2-elastic",
MinimalServiceSettings.sparseEmbedding(ElasticInferenceService.NAME),
service
),
new InferenceService.DefaultConfigId(
".multilingual-embed-v1-elastic",
MinimalServiceSettings.textEmbedding(
ElasticInferenceService.NAME,
ElasticInferenceService.DENSE_TEXT_EMBEDDINGS_DIMENSIONS,
ElasticInferenceService.defaultDenseTextEmbeddingsSimilarity(),
DenseVectorFieldMapper.ElementType.FLOAT
),
service
)
)
)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

package org.elasticsearch.xpack.inference.external.response.elastic;

import org.elasticsearch.common.xcontent.LoggingDeprecationHandler;
import org.elasticsearch.common.xcontent.XContentParserUtils;
import org.elasticsearch.xcontent.XContentFactory;
import org.elasticsearch.xcontent.XContentParser;
import org.elasticsearch.xcontent.XContentParserConfiguration;
import org.elasticsearch.xcontent.XContentType;
import org.elasticsearch.xpack.core.inference.results.TextEmbeddingFloatResults;
import org.elasticsearch.xpack.inference.external.http.HttpResult;
import org.elasticsearch.xpack.inference.external.request.Request;

import java.io.IOException;
import java.util.Collections;
import java.util.List;

import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken;
import static org.elasticsearch.common.xcontent.XContentParserUtils.parseList;
import static org.elasticsearch.xpack.inference.external.response.XContentUtils.moveToFirstToken;
import static org.elasticsearch.xpack.inference.external.response.XContentUtils.positionParserAtTokenAfterField;

public class ElasticInferenceServiceDenseTextEmbeddingsResponseEntity {

private static final String FAILED_TO_FIND_FIELD_TEMPLATE =
"Failed to find required field [%s] in Elastic Inference Service dense text embeddings response";

/**
* Parses the Elastic Inference Service Dense Text Embeddings response.
*
* For a request like:
*
* <pre>
* <code>
* {
* "inputs": ["Embed this text", "Embed this text, too"]
* }
* </code>
* </pre>
*
* The response would look like:
*
* <pre>
* <code>
* {
* "data": [
* [
* 2.1259406,
* 1.7073475,
* 0.9020516
* ],
* (...)
* ],
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I vaguely remembered Tim's thread on this a couple weeks ago, but should we revisit the response format? Looking at OpenAI, Alibaba, and Mixedbread as quick references, it looks like they return a list of objects. I don't have a strong preference, but just wanted to bring this up since we might be differing from others here and wanted to confirm that this is what we want.
Thanks!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Answered in the thread

* "meta": {
* "usage": {...}
* }
* }
* </code>
* </pre>
*/

public static TextEmbeddingFloatResults fromResponse(Request request, HttpResult response) throws IOException {
var parserConfig = XContentParserConfiguration.EMPTY.withDeprecationHandler(LoggingDeprecationHandler.INSTANCE);

try (XContentParser jsonParser = XContentFactory.xContent(XContentType.JSON).createParser(parserConfig, response.body())) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

moveToFirstToken(jsonParser);

XContentParser.Token token = jsonParser.currentToken();
ensureExpectedToken(XContentParser.Token.START_OBJECT, token, jsonParser);

positionParserAtTokenAfterField(jsonParser, "data", FAILED_TO_FIND_FIELD_TEMPLATE);

List<TextEmbeddingFloatResults.Embedding> parsedEmbeddings = parseList(
jsonParser,
(parser, index) -> ElasticInferenceServiceDenseTextEmbeddingsResponseEntity.parseTextEmbeddingObject(parser)
);

if (parsedEmbeddings.isEmpty()) {
return new TextEmbeddingFloatResults(Collections.emptyList());
}

return new TextEmbeddingFloatResults(parsedEmbeddings);
}
}

private static TextEmbeddingFloatResults.Embedding parseTextEmbeddingObject(XContentParser parser) throws IOException {
List<Float> embeddingValueList = parseList(
parser,
ElasticInferenceServiceDenseTextEmbeddingsResponseEntity::parseEmbeddingFloatValueList
);
return TextEmbeddingFloatResults.Embedding.of(embeddingValueList);
}

private static float parseEmbeddingFloatValueList(XContentParser parser) throws IOException {
XContentParser.Token token = parser.currentToken();
XContentParserUtils.ensureExpectedToken(XContentParser.Token.VALUE_NUMBER, token, parser);
return parser.floatValue();
}

private ElasticInferenceServiceDenseTextEmbeddingsResponseEntity() {}
}
Loading