Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
51 commits
Select commit Hold shift + click to select a range
85478cf
Refactor Hugging Face service settings and completion request methods…
Jan-Kazlouski-elastic Jun 23, 2025
727fd8e
Add Llama model support for embeddings and chat completions
Jan-Kazlouski-elastic Jun 26, 2025
cc14b18
Refactor Llama request classes to improve secret settings handling
Jan-Kazlouski-elastic Jun 26, 2025
ceef95a
Refactor DeltaParser in LlamaStreamingProcessor to improve argument h…
Jan-Kazlouski-elastic Jun 29, 2025
55d9014
Enhance Llama streaming processing by adding support for nullable obj…
Jan-Kazlouski-elastic Jul 1, 2025
a83b0b7
[CI] Auto commit changes from spotless
Jul 1, 2025
852aa19
Merge remote-tracking branch 'origin/main' into feature/llama-embedin…
Jan-Kazlouski-elastic Jul 1, 2025
d6b53c3
Fix error messages in LlamaActionCreator
Jan-Kazlouski-elastic Jul 1, 2025
6ce4b09
[CI] Auto commit changes from spotless
Jul 1, 2025
8e7ca13
Add detailed Javadoc comments to Llama classes for improved documenta…
Jan-Kazlouski-elastic Jul 1, 2025
604d441
Enhance LlamaChatCompletionResponseHandler to support mid-stream erro…
Jan-Kazlouski-elastic Jul 1, 2025
74fd6e8
Add Javadoc comments to Llama classes for improved documentation and …
Jan-Kazlouski-elastic Jul 1, 2025
ac161fa
Fix checkstyle
Jan-Kazlouski-elastic Jul 1, 2025
a13020c
Update LlamaEmbeddingsRequest to use mediaTypeWithoutParameters for c…
Jan-Kazlouski-elastic Jul 2, 2025
4eade05
Add unit tests for LlamaActionCreator and related models
Jan-Kazlouski-elastic Jul 2, 2025
39c5787
Add unit tests for LlamaChatCompletionServiceSettings to validate con…
Jan-Kazlouski-elastic Jul 2, 2025
6a135c5
Add unit tests for LlamaEmbeddingsServiceSettings to validate configu…
Jan-Kazlouski-elastic Jul 2, 2025
c6fc56f
Add unit tests for LlamaEmbeddingsServiceSettings to validate various…
Jan-Kazlouski-elastic Jul 2, 2025
e2dce7c
Add unit tests for LlamaChatCompletionResponseHandler to validate err…
Jan-Kazlouski-elastic Jul 3, 2025
41591ae
Refactor Llama embedding and chat completion tests for consistency an…
Jan-Kazlouski-elastic Jul 3, 2025
4d2a5dd
Add unit tests for LlamaChatCompletionRequestEntity to validate messa…
Jan-Kazlouski-elastic Jul 3, 2025
1573d53
Add unit tests for LlamaEmbeddingsRequest to validate request creatio…
Jan-Kazlouski-elastic Jul 3, 2025
da55903
Add unit tests for LlamaEmbeddingsRequestEntity to validate XContent …
Jan-Kazlouski-elastic Jul 3, 2025
8cc8958
Add unit tests for LlamaErrorResponse to validate error handling from…
Jan-Kazlouski-elastic Jul 3, 2025
9573a48
Add unit tests for LlamaChatCompletionServiceSettings to validate con…
Jan-Kazlouski-elastic Jul 4, 2025
36ff4cd
Merge remote-tracking branch 'origin/main' into feature/llama-embedin…
Jan-Kazlouski-elastic Jul 4, 2025
c193ecf
Add tests for LlamaService request configuration validation and error…
Jan-Kazlouski-elastic Jul 5, 2025
c3baecf
Merge remote-tracking branch 'origin/main' into feature/llama-embedin…
Jan-Kazlouski-elastic Jul 7, 2025
a7e342b
Fix error message formatting in LlamaServiceTests for better localiza…
Jan-Kazlouski-elastic Jul 7, 2025
15c14d7
Merge remote-tracking branch 'origin/main' into feature/llama-embedin…
Jan-Kazlouski-elastic Jul 10, 2025
75cbf85
Refactor Llama model classes to implement accept method for action vi…
Jan-Kazlouski-elastic Jul 10, 2025
e06653b
Hide Llama service from configuration API to enhance security and red…
Jan-Kazlouski-elastic Jul 10, 2025
fe6173e
Refactor Llama model classes to remove modelId and update embedding r…
Jan-Kazlouski-elastic Jul 10, 2025
ad009c6
Refactor Llama request classes to use pattern matching for secret set…
Jan-Kazlouski-elastic Jul 10, 2025
18ee182
Update embeddings handler to use HuggingFace response entity
Jan-Kazlouski-elastic Jul 10, 2025
c2621e7
Refactor Mistral model classes to remove modelId and update rate limi…
Jan-Kazlouski-elastic Jul 10, 2025
eb60dfa
Refactor Mistral action classes to remove taskSettings parameter and …
Jan-Kazlouski-elastic Jul 10, 2025
76ddf99
Refactor Llama and Mistral models to remove taskSettings parameter an…
Jan-Kazlouski-elastic Jul 10, 2025
9100f69
Refactor Llama service tests to use Model instead of CustomModel and …
Jan-Kazlouski-elastic Jul 11, 2025
5fb9dad
Remove unused tests and imports from LlamaServiceTests
Jan-Kazlouski-elastic Jul 11, 2025
47c9cc6
Add chunking settings support to Llama embeddings model tests
Jan-Kazlouski-elastic Jul 11, 2025
34e21de
Merge remote-tracking branch 'origin/main' into feature/llama-embedin…
Jan-Kazlouski-elastic Jul 11, 2025
1c1ba1d
Merge remote-tracking branch 'origin/main' into feature/llama-embedin…
Jan-Kazlouski-elastic Jul 14, 2025
c267269
Add changelog
Jan-Kazlouski-elastic Jul 14, 2025
098849f
Merge remote-tracking branch 'origin/main' into feature/llama-embedin…
Jan-Kazlouski-elastic Jul 15, 2025
06c7bd1
Add support for version checks in Llama settings and define new trans…
Jan-Kazlouski-elastic Jul 15, 2025
33da7a9
Merge remote-tracking branch 'origin/main' into feature/llama-embedin…
Jan-Kazlouski-elastic Jul 16, 2025
28335ef
Refactor Llama model assertions and remove unused version support met…
Jan-Kazlouski-elastic Jul 16, 2025
d43d1e9
Merge remote-tracking branch 'origin/main' into feature/llama-embedin…
Jan-Kazlouski-elastic Jul 18, 2025
528a9d9
Refactor Llama service constructors to include ClusterService and imp…
Jan-Kazlouski-elastic Jul 18, 2025
c879b96
Merge remote-tracking branch 'origin/main' into feature/llama-embedin…
Jan-Kazlouski-elastic Jul 18, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/130092.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 130092
summary: "Added Llama provider support to the Inference Plugin"
area: Machine Learning
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,27 @@ public <T> void declareField(BiConsumer<Value, T> consumer, ContextParser<Contex
}
}

/**
* Declare a field that is an array of objects or null. Used to avoid calling the consumer when used with
* {@link #optionalConstructorArg()} or {@link #constructorArg()}.
* @param consumer Consumer that will be passed as is to the {@link #declareField(BiConsumer, ContextParser, ParseField, ValueType)}.
* @param objectParser Parser that will parse the objects in the array, checking for nulls.
* @param field Field to declare.
*/
@Override
public <T> void declareObjectArrayOrNull(
BiConsumer<Value, List<T>> consumer,
ContextParser<Context, T> objectParser,
ParseField field
) {
declareField(
consumer,
(p, c) -> p.currentToken() == XContentParser.Token.VALUE_NULL ? null : parseArray(p, c, objectParser),
field,
ValueType.OBJECT_ARRAY_OR_NULL
);
}

@Override
public <T> void declareNamedObject(
BiConsumer<Value, T> consumer,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,7 @@ static TransportVersion def(int id) {
public static final TransportVersion ESQL_CATEGORIZE_OPTIONS = def(9_122_0_00);
public static final TransportVersion ML_INFERENCE_AZURE_AI_STUDIO_RERANK_ADDED = def(9_123_0_00);
public static final TransportVersion PROJECT_STATE_REGISTRY_ENTRY = def(9_124_0_00);
public static final TransportVersion ML_INFERENCE_LLAMA_ADDED = def(9_125_0_00);

/*
* STOP! READ THIS FIRST! No, really,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ public static Params withMaxTokensAndSkipStreamOptionsField(String modelId, Para
* - Key: {@link #MODEL_FIELD}, Value: modelId
* - Key: {@link #MAX_COMPLETION_TOKENS_FIELD}, Value: {@link #maxCompletionTokens()}
*/
public static Params withMaxCompletionTokensTokens(String modelId, Params params) {
public static Params withMaxCompletionTokens(String modelId, Params params) {
return new DelegatingMapParams(
Map.ofEntries(Map.entry(MODEL_ID_PARAM, modelId), Map.entry(MAX_TOKENS_PARAM, MAX_COMPLETION_TOKENS_FIELD)),
params
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ public void testParseAllFields() throws IOException {

assertThat(request, is(expected));
assertThat(
Strings.toString(request, UnifiedCompletionRequest.withMaxCompletionTokensTokens("gpt-4o", ToXContent.EMPTY_PARAMS)),
Strings.toString(request, UnifiedCompletionRequest.withMaxCompletionTokens("gpt-4o", ToXContent.EMPTY_PARAMS)),
is(XContentHelper.stripWhitespace(requestJson))
);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,8 @@
import org.elasticsearch.xpack.inference.services.jinaai.embeddings.JinaAIEmbeddingsTaskSettings;
import org.elasticsearch.xpack.inference.services.jinaai.rerank.JinaAIRerankServiceSettings;
import org.elasticsearch.xpack.inference.services.jinaai.rerank.JinaAIRerankTaskSettings;
import org.elasticsearch.xpack.inference.services.llama.completion.LlamaChatCompletionServiceSettings;
import org.elasticsearch.xpack.inference.services.llama.embeddings.LlamaEmbeddingsServiceSettings;
import org.elasticsearch.xpack.inference.services.mistral.completion.MistralChatCompletionServiceSettings;
import org.elasticsearch.xpack.inference.services.mistral.embeddings.MistralEmbeddingsServiceSettings;
import org.elasticsearch.xpack.inference.services.openai.completion.OpenAiChatCompletionServiceSettings;
Expand Down Expand Up @@ -175,6 +177,7 @@ public static List<NamedWriteableRegistry.Entry> getNamedWriteables() {
addJinaAINamedWriteables(namedWriteables);
addVoyageAINamedWriteables(namedWriteables);
addCustomNamedWriteables(namedWriteables);
addLlamaNamedWriteables(namedWriteables);

addUnifiedNamedWriteables(namedWriteables);

Expand Down Expand Up @@ -274,8 +277,25 @@ private static void addMistralNamedWriteables(List<NamedWriteableRegistry.Entry>
MistralChatCompletionServiceSettings::new
)
);
// no task settings for Mistral
}

// note - no task settings for Mistral embeddings...
private static void addLlamaNamedWriteables(List<NamedWriteableRegistry.Entry> namedWriteables) {
namedWriteables.add(
new NamedWriteableRegistry.Entry(
ServiceSettings.class,
LlamaEmbeddingsServiceSettings.NAME,
LlamaEmbeddingsServiceSettings::new
)
);
namedWriteables.add(
new NamedWriteableRegistry.Entry(
ServiceSettings.class,
LlamaChatCompletionServiceSettings.NAME,
LlamaChatCompletionServiceSettings::new
)
);
// no task settings for Llama
}

private static void addAzureAiStudioNamedWriteables(List<NamedWriteableRegistry.Entry> namedWriteables) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@
import org.elasticsearch.xpack.inference.services.huggingface.elser.HuggingFaceElserService;
import org.elasticsearch.xpack.inference.services.ibmwatsonx.IbmWatsonxService;
import org.elasticsearch.xpack.inference.services.jinaai.JinaAIService;
import org.elasticsearch.xpack.inference.services.llama.LlamaService;
import org.elasticsearch.xpack.inference.services.mistral.MistralService;
import org.elasticsearch.xpack.inference.services.openai.OpenAiService;
import org.elasticsearch.xpack.inference.services.sagemaker.SageMakerClient;
Expand Down Expand Up @@ -402,6 +403,7 @@ public List<InferenceServiceExtension.Factory> getInferenceServiceFactories() {
context -> new JinaAIService(httpFactory.get(), serviceComponents.get(), context),
context -> new VoyageAIService(httpFactory.get(), serviceComponents.get(), context),
context -> new DeepSeekService(httpFactory.get(), serviceComponents.get(), context),
context -> new LlamaService(httpFactory.get(), serviceComponents.get(), context),
ElasticsearchInternalService::new,
context -> new CustomService(httpFactory.get(), serviceComponents.get(), context)
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import org.elasticsearch.core.Tuple;
import org.elasticsearch.inference.InputType;
import org.elasticsearch.inference.Model;
import org.elasticsearch.inference.ModelConfigurations;
import org.elasticsearch.inference.SimilarityMeasure;
import org.elasticsearch.inference.TaskType;
import org.elasticsearch.rest.RestStatus;
Expand Down Expand Up @@ -304,6 +305,12 @@ public static String invalidSettingError(String settingName, String scope) {
return Strings.format("[%s] does not allow the setting [%s]", scope, settingName);
}

public static URI extractUri(Map<String, Object> map, String fieldName, ValidationException validationException) {
String parsedUrl = extractRequiredString(map, fieldName, ModelConfigurations.SERVICE_SETTINGS, validationException);

return convertToUri(parsedUrl, fieldName, ModelConfigurations.SERVICE_SETTINGS, validationException);
}

public static URI convertToUri(@Nullable String url, String settingName, String settingScope, ValidationException validationException) {
try {
return createOptionalUri(url);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ public ElasticInferenceServiceUnifiedChatCompletionRequestEntity(UnifiedChatInpu
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
unifiedRequestEntity.toXContent(builder, UnifiedCompletionRequest.withMaxCompletionTokensTokens(modelId, params));
unifiedRequestEntity.toXContent(builder, UnifiedCompletionRequest.withMaxCompletionTokens(modelId, params));
builder.endObject();

return builder;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,10 @@
import static org.elasticsearch.xpack.inference.services.ServiceFields.MAX_INPUT_TOKENS;
import static org.elasticsearch.xpack.inference.services.ServiceFields.SIMILARITY;
import static org.elasticsearch.xpack.inference.services.ServiceFields.URL;
import static org.elasticsearch.xpack.inference.services.ServiceUtils.convertToUri;
import static org.elasticsearch.xpack.inference.services.ServiceUtils.createUri;
import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractOptionalPositiveInteger;
import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractRequiredString;
import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractSimilarity;
import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractUri;

public class HuggingFaceServiceSettings extends FilteredXContentObject implements ServiceSettings, HuggingFaceRateLimitServiceSettings {
public static final String NAME = "hugging_face_service_settings";
Expand Down Expand Up @@ -70,12 +69,6 @@ public static HuggingFaceServiceSettings fromMap(Map<String, Object> map, Config
return new HuggingFaceServiceSettings(uri, similarityMeasure, dims, maxInputTokens, rateLimitSettings);
}

public static URI extractUri(Map<String, Object> map, String fieldName, ValidationException validationException) {
String parsedUrl = extractRequiredString(map, fieldName, ModelConfigurations.SERVICE_SETTINGS, validationException);

return convertToUri(parsedUrl, fieldName, ModelConfigurations.SERVICE_SETTINGS, validationException);
}

private final URI uri;
private final SimilarityMeasure similarity;
private final Integer dimensions;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
import static org.elasticsearch.xpack.inference.services.ServiceFields.URL;
import static org.elasticsearch.xpack.inference.services.ServiceUtils.createUri;
import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractOptionalString;
import static org.elasticsearch.xpack.inference.services.huggingface.HuggingFaceServiceSettings.extractUri;
import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractUri;

/**
* Settings for the Hugging Face chat completion service.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@

import static org.elasticsearch.xpack.inference.services.ServiceFields.MAX_INPUT_TOKENS;
import static org.elasticsearch.xpack.inference.services.ServiceUtils.createUri;
import static org.elasticsearch.xpack.inference.services.huggingface.HuggingFaceServiceSettings.extractUri;
import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractUri;

public class HuggingFaceElserServiceSettings extends FilteredXContentObject
implements
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
import java.util.Objects;

import static org.elasticsearch.xpack.inference.services.ServiceUtils.createUri;
import static org.elasticsearch.xpack.inference.services.huggingface.HuggingFaceServiceSettings.extractUri;
import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractUri;

public class HuggingFaceRerankServiceSettings extends FilteredXContentObject
implements
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

package org.elasticsearch.xpack.inference.services.llama;

import org.elasticsearch.inference.EmptySecretSettings;
import org.elasticsearch.inference.ModelConfigurations;
import org.elasticsearch.inference.ModelSecrets;
import org.elasticsearch.inference.SecretSettings;
import org.elasticsearch.inference.ServiceSettings;
import org.elasticsearch.xpack.inference.external.action.ExecutableAction;
import org.elasticsearch.xpack.inference.services.RateLimitGroupingModel;
import org.elasticsearch.xpack.inference.services.llama.action.LlamaActionVisitor;
import org.elasticsearch.xpack.inference.services.settings.DefaultSecretSettings;
import org.elasticsearch.xpack.inference.services.settings.RateLimitSettings;

import java.net.URI;
import java.net.URISyntaxException;
import java.util.Map;
import java.util.Objects;

/**
* Abstract class representing a Llama model for inference.
* This class extends RateLimitGroupingModel and provides common functionality for Llama models.
*/
public abstract class LlamaModel extends RateLimitGroupingModel {
protected URI uri;
protected RateLimitSettings rateLimitSettings;

/**
* Constructor for creating a LlamaModel with specified configurations and secrets.
*
* @param configurations the model configurations
* @param secrets the secret settings for the model
*/
protected LlamaModel(ModelConfigurations configurations, ModelSecrets secrets) {
super(configurations, secrets);
}

/**
* Constructor for creating a LlamaModel with specified model, service settings, and secret settings.
* @param model the model configurations
* @param serviceSettings the settings for the inference service
*/
protected LlamaModel(RateLimitGroupingModel model, ServiceSettings serviceSettings) {
super(model, serviceSettings);
}

public URI uri() {
return this.uri;
}

@Override
public RateLimitSettings rateLimitSettings() {
return this.rateLimitSettings;
}

@Override
public int rateLimitGroupingHash() {
return Objects.hash(getServiceSettings().modelId(), uri, getSecretSettings());
}

// Needed for testing only
public void setURI(String newUri) {
try {
this.uri = new URI(newUri);
} catch (URISyntaxException e) {
// swallow any error
}
}

/**
* Retrieves the secret settings from the provided map of secrets.
* If the map is null or empty, it returns an instance of EmptySecretSettings.
* Caused by the fact that Llama model doesn't have out of the box security settings and can be used witout authentication.
*
* @param secrets the map containing secret settings
* @return an instance of SecretSettings
*/
protected static SecretSettings retrieveSecretSettings(Map<String, Object> secrets) {
return (secrets != null && secrets.isEmpty()) ? EmptySecretSettings.INSTANCE : DefaultSecretSettings.fromMap(secrets);
}

protected abstract ExecutableAction accept(LlamaActionVisitor creator);
}
Loading
Loading