elastic
diff --git a/‎x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/llama/LlamaModel.java‎
Lines changed: 23 additions & 0 deletions b/‎x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/llama/LlamaModel.java‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/llama/LlamaService.java‎
Lines changed: 29 additions & 3 deletions b/‎x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/llama/LlamaService.java‎
Lines changed: 29 additions & 3 deletions
diff --git a/‎x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/llama/completion/LlamaChatCompletionResponseHandler.java‎
Lines changed: 18 additions & 0 deletions b/‎x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/llama/completion/LlamaChatCompletionResponseHandler.java‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/llama/completion/LlamaChatCompletionServiceSettings.java‎
Lines changed: 42 additions & 0 deletions b/‎x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/llama/completion/LlamaChatCompletionServiceSettings.java‎
Lines changed: 42 additions & 0 deletions
diff --git a/‎x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/llama/embeddings/LlamaEmbeddingsModel.java‎
Lines changed: 42 additions & 0 deletions b/‎x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/llama/embeddings/LlamaEmbeddingsModel.java‎
Lines changed: 42 additions & 0 deletions
diff --git a/‎x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/llama/embeddings/LlamaEmbeddingsResponseHandler.java‎
Lines changed: 10 additions & 0 deletions b/‎x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/llama/embeddings/LlamaEmbeddingsResponseHandler.java‎
Lines changed: 10 additions & 0 deletions
@@ -21,15 +21,30 @@
 import java.util.Map;
 import java.util.Objects;
 
+/**
+ * Abstract class representing a Llama model for inference.
+ * This class extends RateLimitGroupingModel and provides common functionality for Llama models.
+ */
 public abstract class LlamaModel extends RateLimitGroupingModel {
     protected String modelId;
     protected URI uri;
     protected RateLimitSettings rateLimitSettings;
 
+    /**
+     * Constructor for creating a LlamaModel with specified configurations and secrets.
+     *
+     * @param configurations the model configurations
+     * @param secrets the secret settings for the model
+     */
     protected LlamaModel(ModelConfigurations configurations, ModelSecrets secrets) {
         super(configurations, secrets);
     }
 
+    /**
+     * Constructor for creating a LlamaModel with specified model, service settings, and secret settings.
+     * @param model the model configurations
+     * @param serviceSettings the settings for the inference service
+     */
     protected LlamaModel(RateLimitGroupingModel model, ServiceSettings serviceSettings) {
         super(model, serviceSettings);
     }
@@ -61,6 +76,14 @@ public void setURI(String newUri) {
         }
     }
 
+    /**
+     * Retrieves the secret settings from the provided map of secrets.
+     * If the map is null or empty, it returns an instance of EmptySecretSettings.
+     * Caused by the fact that Llama model doesn't have out of the box security settings and can be used witout authentication.
+     *
+     * @param secrets the map containing secret settings
+     * @return an instance of SecretSettings
+     */
     protected static SecretSettings retrieveSecretSettings(Map<String, Object> secrets) {
         return (secrets != null && secrets.isEmpty()) ? EmptySecretSettings.INSTANCE : DefaultSecretSettings.fromMap(secrets);
     }
 
@@ -68,9 +68,12 @@
 import static org.elasticsearch.xpack.inference.services.ServiceUtils.removeFromMapOrThrowIfNull;
 import static org.elasticsearch.xpack.inference.services.ServiceUtils.throwIfNotEmptyMap;
 
+/**
+ * LlamaService is an inference service for Llama models, supporting text embedding and chat completion tasks.
+ * It extends SenderService to handle HTTP requests and responses for Llama models.
+ */
 public class LlamaService extends SenderService {
     public static final String NAME = "llama";
-
     private static final String SERVICE_NAME = "Llama";
     /**
      * The optimal batch size depends on the hardware the model is deployed on.
@@ -84,6 +87,12 @@ public class LlamaService extends SenderService {
         OpenAiChatCompletionResponseEntity::fromResponse
     );
 
+    /**
+     * Constructor for creating a LlamaService with specified HTTP request sender factory and service components.
+     *
+     * @param factory the factory to create HTTP request senders
+     * @param serviceComponents the components required for the inference service
+     */
     public LlamaService(HttpRequestSender.Factory factory, ServiceComponents serviceComponents) {
         super(factory, serviceComponents);
     }
@@ -112,6 +121,19 @@ protected void validateInputType(InputType inputType, Model model, ValidationExc
         ServiceUtils.validateInputTypeIsUnspecifiedOrInternal(inputType, validationException);
     }
 
+    /**
+     * Creates a LlamaModel based on the provided parameters.
+     *
+     * @param inferenceId the unique identifier for the inference entity
+     * @param taskType the type of task this model is designed for
+     * @param serviceSettings the settings for the inference service
+     * @param taskSettings the settings specific to the task
+     * @param chunkingSettings the settings for chunking, if applicable
+     * @param secretSettings the secret settings for the model, such as API keys or tokens
+     * @param failureMessage the message to use in case of failure
+     * @param context the context for parsing configuration settings
+     * @return a new instance of LlamaModel based on the provided parameters
+     */
     protected LlamaModel createModel(
         String inferenceId,
         TaskType taskType,
@@ -352,14 +374,18 @@ public TransportVersion getMinimalSupportedVersion() {
         return TransportVersions.ML_INFERENCE_LLAMA_ADDED;
     }
 
+    /**
+     * Configuration class for the Llama inference service.
+     * It provides the settings and configurations required for the service.
+     */
     public static class Configuration {
         public static InferenceServiceConfiguration get() {
-            return configuration.getOrCompute();
+            return CONFIGURATION.getOrCompute();
         }
 
         private Configuration() {}
 
-        private static final LazyInitializable<InferenceServiceConfiguration, RuntimeException> configuration = new LazyInitializable<>(
+        private static final LazyInitializable<InferenceServiceConfiguration, RuntimeException> CONFIGURATION = new LazyInitializable<>(
             () -> {
                 var configurationMap = new HashMap<String, SettingsConfiguration>();
 
 
@@ -109,6 +109,19 @@ protected Exception buildMidStreamError(Request request, String message, Excepti
         }
     }
 
+    /**
+     * StreamingLlamaErrorResponseEntity allows creation of {@link ErrorResponse} from a JSON string.
+     * This entity is used to parse error responses from streaming Llama requests.
+     * For non-streaming requests {@link LlamaErrorResponse} should be used.
+     * Example error response for Bad Request error would look like:
+     * <pre><code>
+     *  {
+     *      "error": {
+     *          "message": "400: Invalid value: Model 'llama3.12:3b' not found"
+     *      }
+     *  }
+     * </code></pre>
+     */
     private static class StreamingLlamaErrorResponseEntity extends ErrorResponse {
         private static final ConstructingObjectParser<Optional<ErrorResponse>, Void> ERROR_PARSER = new ConstructingObjectParser<>(
             LLAMA_ERROR,
@@ -155,6 +168,11 @@ private static ErrorResponse fromString(String response) {
             return ErrorResponse.UNDEFINED_ERROR;
         }
 
+        /**
+         * Constructs a StreamingLlamaErrorResponseEntity with the specified error message.
+         *
+         * @param errorMessage the error message to include in the response entity
+         */
         StreamingLlamaErrorResponseEntity(String errorMessage) {
             super(errorMessage);
         }
 
@@ -32,6 +32,10 @@
 import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractRequiredString;
 import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractUri;
 
+/**
+ * Represents the settings for a Llama chat completion service.
+ * This class encapsulates the model ID, URI, and rate limit settings for the Llama chat completion service.
+ */
 public class LlamaChatCompletionServiceSettings extends FilteredXContentObject implements ServiceSettings {
     public static final String NAME = "llama_completion_service_settings";
     // There is no default rate limit for Llama, so we set a reasonable default of 3000 requests per minute
@@ -41,6 +45,14 @@ public class LlamaChatCompletionServiceSettings extends FilteredXContentObject i
     private final URI uri;
     private final RateLimitSettings rateLimitSettings;
 
+    /**
+     * Creates a new instance of LlamaChatCompletionServiceSettings from a map of settings.
+     *
+     * @param map the map containing the service settings
+     * @param context the context for parsing configuration settings
+     * @return a new instance of LlamaChatCompletionServiceSettings
+     * @throws ValidationException if required fields are missing or invalid
+     */
     public static LlamaChatCompletionServiceSettings fromMap(Map<String, Object> map, ConfigurationParseContext context) {
         ValidationException validationException = new ValidationException();
 
@@ -61,18 +73,38 @@ public static LlamaChatCompletionServiceSettings fromMap(Map<String, Object> map
         return new LlamaChatCompletionServiceSettings(model, uri, rateLimitSettings);
     }
 
+    /**
+     * Constructs a new LlamaChatCompletionServiceSettings from a StreamInput.
+     *
+     * @param in the StreamInput to read from
+     * @throws IOException if an I/O error occurs during reading
+     */
     public LlamaChatCompletionServiceSettings(StreamInput in) throws IOException {
         this.modelId = in.readString();
         this.uri = createUri(in.readString());
         this.rateLimitSettings = new RateLimitSettings(in);
     }
 
+    /**
+     * Constructs a new LlamaChatCompletionServiceSettings with the specified model ID, URI, and rate limit settings.
+     *
+     * @param modelId the ID of the model
+     * @param uri the URI of the service
+     * @param rateLimitSettings the rate limit settings for the service
+     */
     public LlamaChatCompletionServiceSettings(String modelId, URI uri, @Nullable RateLimitSettings rateLimitSettings) {
         this.modelId = modelId;
         this.uri = uri;
         this.rateLimitSettings = Objects.requireNonNullElse(rateLimitSettings, DEFAULT_RATE_LIMIT_SETTINGS);
     }
 
+    /**
+     * Constructs a new LlamaChatCompletionServiceSettings with the specified model ID and URL.
+     * The rate limit settings will be set to the default value.
+     *
+     * @param modelId the ID of the model
+     * @param url the URL of the service
+     */
     public LlamaChatCompletionServiceSettings(String modelId, String url, @Nullable RateLimitSettings rateLimitSettings) {
         this(modelId, createUri(url), rateLimitSettings);
     }
@@ -92,10 +124,20 @@ public String modelId() {
         return this.modelId;
     }
 
+    /**
+     * Returns the URI of the Llama chat completion service.
+     *
+     * @return the URI of the service
+     */
     public URI uri() {
         return this.uri;
     }
 
+    /**
+     * Returns the rate limit settings for the Llama chat completion service.
+     *
+     * @return the rate limit settings
+     */
     public RateLimitSettings rateLimitSettings() {
         return this.rateLimitSettings;
     }
 
@@ -21,8 +21,22 @@
 
 import java.util.Map;
 
+/**
+ * Represents a Llama embeddings model for inference.
+ * This class extends the LlamaModel and provides specific configurations and settings for embeddings tasks.
+ */
 public class LlamaEmbeddingsModel extends LlamaModel {
 
+    /**
+     * Constructor for creating a LlamaEmbeddingsModel with specified parameters.
+     *
+     * @param inferenceEntityId the unique identifier for the inference entity
+     * @param taskType the type of task this model is designed for
+     * @param service the name of the inference service
+     * @param serviceSettings the settings for the inference service, specific to embeddings
+     * @param secrets the secret settings for the model, such as API keys or tokens
+     * @param context the context for parsing configuration settings
+     */
     public LlamaEmbeddingsModel(
         String inferenceEntityId,
         TaskType taskType,
@@ -44,17 +58,39 @@ public LlamaEmbeddingsModel(
         );
     }
 
+    /**
+     * Constructor for creating a LlamaEmbeddingsModel with specified parameters.
+     *
+     * @param model the base LlamaEmbeddingsModel to copy properties from
+     * @param serviceSettings the settings for the inference service, specific to embeddings
+     */
     public LlamaEmbeddingsModel(LlamaEmbeddingsModel model, LlamaEmbeddingsServiceSettings serviceSettings) {
         super(model, serviceSettings);
         setPropertiesFromServiceSettings(serviceSettings);
     }
 
+    /**
+     * Sets properties from the provided LlamaEmbeddingsServiceSettings.
+     *
+     * @param serviceSettings the service settings to extract properties from
+     */
     private void setPropertiesFromServiceSettings(LlamaEmbeddingsServiceSettings serviceSettings) {
         this.modelId = serviceSettings.modelId();
         this.uri = serviceSettings.uri();
         this.rateLimitSettings = serviceSettings.rateLimitSettings();
     }
 
+    /**
+     * Constructor for creating a LlamaEmbeddingsModel with specified parameters.
+     *
+     * @param inferenceEntityId the unique identifier for the inference entity
+     * @param taskType the type of task this model is designed for
+     * @param service the name of the inference service
+     * @param serviceSettings the settings for the inference service, specific to embeddings
+     * @param taskSettings the task settings for the model
+     * @param chunkingSettings the chunking settings for processing input data
+     * @param secrets the secret settings for the model, such as API keys or tokens
+     */
     public LlamaEmbeddingsModel(
         String inferenceEntityId,
         TaskType taskType,
@@ -76,6 +112,12 @@ public LlamaEmbeddingsServiceSettings getServiceSettings() {
         return (LlamaEmbeddingsServiceSettings) super.getServiceSettings();
     }
 
+    /**
+     * Accepts a visitor to create an executable action for this Llama embeddings model.
+     *
+     * @param creator the visitor that creates the executable action
+     * @return an ExecutableAction representing the Llama embeddings model
+     */
     public ExecutableAction accept(LlamaActionVisitor creator) {
         return creator.create(this);
     }
 
@@ -11,8 +11,18 @@
 import org.elasticsearch.xpack.inference.services.llama.response.LlamaErrorResponse;
 import org.elasticsearch.xpack.inference.services.openai.OpenAiResponseHandler;
 
+/**
+ * Handles responses for Llama embeddings requests, parsing the response and handling errors.
+ * This class extends OpenAiResponseHandler to provide specific functionality for Llama embeddings.
+ */
 public class LlamaEmbeddingsResponseHandler extends OpenAiResponseHandler {
 
+    /**
+     * Constructs a new LlamaEmbeddingsResponseHandler with the specified request type and response parser.
+     *
+     * @param requestType the type of request this handler will process
+     * @param parseFunction the function to parse the response
+     */
     public LlamaEmbeddingsResponseHandler(String requestType, ResponseParser parseFunction) {
         super(requestType, parseFunction, LlamaErrorResponse::fromResponse, false);
     }