Skip to content

Commit 74fd6e8

Browse files
Add Javadoc comments to Llama classes for improved documentation and clarity
1 parent 604d441 commit 74fd6e8

File tree

13 files changed

+310
-3
lines changed

13 files changed

+310
-3
lines changed

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/llama/LlamaModel.java

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,15 +21,30 @@
2121
import java.util.Map;
2222
import java.util.Objects;
2323

24+
/**
25+
* Abstract class representing a Llama model for inference.
26+
* This class extends RateLimitGroupingModel and provides common functionality for Llama models.
27+
*/
2428
public abstract class LlamaModel extends RateLimitGroupingModel {
2529
protected String modelId;
2630
protected URI uri;
2731
protected RateLimitSettings rateLimitSettings;
2832

33+
/**
34+
* Constructor for creating a LlamaModel with specified configurations and secrets.
35+
*
36+
* @param configurations the model configurations
37+
* @param secrets the secret settings for the model
38+
*/
2939
protected LlamaModel(ModelConfigurations configurations, ModelSecrets secrets) {
3040
super(configurations, secrets);
3141
}
3242

43+
/**
44+
* Constructor for creating a LlamaModel with specified model, service settings, and secret settings.
45+
* @param model the model configurations
46+
* @param serviceSettings the settings for the inference service
47+
*/
3348
protected LlamaModel(RateLimitGroupingModel model, ServiceSettings serviceSettings) {
3449
super(model, serviceSettings);
3550
}
@@ -61,6 +76,14 @@ public void setURI(String newUri) {
6176
}
6277
}
6378

79+
/**
80+
* Retrieves the secret settings from the provided map of secrets.
81+
* If the map is null or empty, it returns an instance of EmptySecretSettings.
82+
* Caused by the fact that Llama model doesn't have out of the box security settings and can be used witout authentication.
83+
*
84+
* @param secrets the map containing secret settings
85+
* @return an instance of SecretSettings
86+
*/
6487
protected static SecretSettings retrieveSecretSettings(Map<String, Object> secrets) {
6588
return (secrets != null && secrets.isEmpty()) ? EmptySecretSettings.INSTANCE : DefaultSecretSettings.fromMap(secrets);
6689
}

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/llama/LlamaService.java

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -68,9 +68,12 @@
6868
import static org.elasticsearch.xpack.inference.services.ServiceUtils.removeFromMapOrThrowIfNull;
6969
import static org.elasticsearch.xpack.inference.services.ServiceUtils.throwIfNotEmptyMap;
7070

71+
/**
72+
* LlamaService is an inference service for Llama models, supporting text embedding and chat completion tasks.
73+
* It extends SenderService to handle HTTP requests and responses for Llama models.
74+
*/
7175
public class LlamaService extends SenderService {
7276
public static final String NAME = "llama";
73-
7477
private static final String SERVICE_NAME = "Llama";
7578
/**
7679
* The optimal batch size depends on the hardware the model is deployed on.
@@ -84,6 +87,12 @@ public class LlamaService extends SenderService {
8487
OpenAiChatCompletionResponseEntity::fromResponse
8588
);
8689

90+
/**
91+
* Constructor for creating a LlamaService with specified HTTP request sender factory and service components.
92+
*
93+
* @param factory the factory to create HTTP request senders
94+
* @param serviceComponents the components required for the inference service
95+
*/
8796
public LlamaService(HttpRequestSender.Factory factory, ServiceComponents serviceComponents) {
8897
super(factory, serviceComponents);
8998
}
@@ -112,6 +121,19 @@ protected void validateInputType(InputType inputType, Model model, ValidationExc
112121
ServiceUtils.validateInputTypeIsUnspecifiedOrInternal(inputType, validationException);
113122
}
114123

124+
/**
125+
* Creates a LlamaModel based on the provided parameters.
126+
*
127+
* @param inferenceId the unique identifier for the inference entity
128+
* @param taskType the type of task this model is designed for
129+
* @param serviceSettings the settings for the inference service
130+
* @param taskSettings the settings specific to the task
131+
* @param chunkingSettings the settings for chunking, if applicable
132+
* @param secretSettings the secret settings for the model, such as API keys or tokens
133+
* @param failureMessage the message to use in case of failure
134+
* @param context the context for parsing configuration settings
135+
* @return a new instance of LlamaModel based on the provided parameters
136+
*/
115137
protected LlamaModel createModel(
116138
String inferenceId,
117139
TaskType taskType,
@@ -352,14 +374,18 @@ public TransportVersion getMinimalSupportedVersion() {
352374
return TransportVersions.ML_INFERENCE_LLAMA_ADDED;
353375
}
354376

377+
/**
378+
* Configuration class for the Llama inference service.
379+
* It provides the settings and configurations required for the service.
380+
*/
355381
public static class Configuration {
356382
public static InferenceServiceConfiguration get() {
357-
return configuration.getOrCompute();
383+
return CONFIGURATION.getOrCompute();
358384
}
359385

360386
private Configuration() {}
361387

362-
private static final LazyInitializable<InferenceServiceConfiguration, RuntimeException> configuration = new LazyInitializable<>(
388+
private static final LazyInitializable<InferenceServiceConfiguration, RuntimeException> CONFIGURATION = new LazyInitializable<>(
363389
() -> {
364390
var configurationMap = new HashMap<String, SettingsConfiguration>();
365391

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/llama/completion/LlamaChatCompletionResponseHandler.java

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,19 @@ protected Exception buildMidStreamError(Request request, String message, Excepti
109109
}
110110
}
111111

112+
/**
113+
* StreamingLlamaErrorResponseEntity allows creation of {@link ErrorResponse} from a JSON string.
114+
* This entity is used to parse error responses from streaming Llama requests.
115+
* For non-streaming requests {@link LlamaErrorResponse} should be used.
116+
* Example error response for Bad Request error would look like:
117+
* <pre><code>
118+
* {
119+
* "error": {
120+
* "message": "400: Invalid value: Model 'llama3.12:3b' not found"
121+
* }
122+
* }
123+
* </code></pre>
124+
*/
112125
private static class StreamingLlamaErrorResponseEntity extends ErrorResponse {
113126
private static final ConstructingObjectParser<Optional<ErrorResponse>, Void> ERROR_PARSER = new ConstructingObjectParser<>(
114127
LLAMA_ERROR,
@@ -155,6 +168,11 @@ private static ErrorResponse fromString(String response) {
155168
return ErrorResponse.UNDEFINED_ERROR;
156169
}
157170

171+
/**
172+
* Constructs a StreamingLlamaErrorResponseEntity with the specified error message.
173+
*
174+
* @param errorMessage the error message to include in the response entity
175+
*/
158176
StreamingLlamaErrorResponseEntity(String errorMessage) {
159177
super(errorMessage);
160178
}

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/llama/completion/LlamaChatCompletionServiceSettings.java

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,10 @@
3232
import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractRequiredString;
3333
import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractUri;
3434

35+
/**
36+
* Represents the settings for a Llama chat completion service.
37+
* This class encapsulates the model ID, URI, and rate limit settings for the Llama chat completion service.
38+
*/
3539
public class LlamaChatCompletionServiceSettings extends FilteredXContentObject implements ServiceSettings {
3640
public static final String NAME = "llama_completion_service_settings";
3741
// There is no default rate limit for Llama, so we set a reasonable default of 3000 requests per minute
@@ -41,6 +45,14 @@ public class LlamaChatCompletionServiceSettings extends FilteredXContentObject i
4145
private final URI uri;
4246
private final RateLimitSettings rateLimitSettings;
4347

48+
/**
49+
* Creates a new instance of LlamaChatCompletionServiceSettings from a map of settings.
50+
*
51+
* @param map the map containing the service settings
52+
* @param context the context for parsing configuration settings
53+
* @return a new instance of LlamaChatCompletionServiceSettings
54+
* @throws ValidationException if required fields are missing or invalid
55+
*/
4456
public static LlamaChatCompletionServiceSettings fromMap(Map<String, Object> map, ConfigurationParseContext context) {
4557
ValidationException validationException = new ValidationException();
4658

@@ -61,18 +73,38 @@ public static LlamaChatCompletionServiceSettings fromMap(Map<String, Object> map
6173
return new LlamaChatCompletionServiceSettings(model, uri, rateLimitSettings);
6274
}
6375

76+
/**
77+
* Constructs a new LlamaChatCompletionServiceSettings from a StreamInput.
78+
*
79+
* @param in the StreamInput to read from
80+
* @throws IOException if an I/O error occurs during reading
81+
*/
6482
public LlamaChatCompletionServiceSettings(StreamInput in) throws IOException {
6583
this.modelId = in.readString();
6684
this.uri = createUri(in.readString());
6785
this.rateLimitSettings = new RateLimitSettings(in);
6886
}
6987

88+
/**
89+
* Constructs a new LlamaChatCompletionServiceSettings with the specified model ID, URI, and rate limit settings.
90+
*
91+
* @param modelId the ID of the model
92+
* @param uri the URI of the service
93+
* @param rateLimitSettings the rate limit settings for the service
94+
*/
7095
public LlamaChatCompletionServiceSettings(String modelId, URI uri, @Nullable RateLimitSettings rateLimitSettings) {
7196
this.modelId = modelId;
7297
this.uri = uri;
7398
this.rateLimitSettings = Objects.requireNonNullElse(rateLimitSettings, DEFAULT_RATE_LIMIT_SETTINGS);
7499
}
75100

101+
/**
102+
* Constructs a new LlamaChatCompletionServiceSettings with the specified model ID and URL.
103+
* The rate limit settings will be set to the default value.
104+
*
105+
* @param modelId the ID of the model
106+
* @param url the URL of the service
107+
*/
76108
public LlamaChatCompletionServiceSettings(String modelId, String url, @Nullable RateLimitSettings rateLimitSettings) {
77109
this(modelId, createUri(url), rateLimitSettings);
78110
}
@@ -92,10 +124,20 @@ public String modelId() {
92124
return this.modelId;
93125
}
94126

127+
/**
128+
* Returns the URI of the Llama chat completion service.
129+
*
130+
* @return the URI of the service
131+
*/
95132
public URI uri() {
96133
return this.uri;
97134
}
98135

136+
/**
137+
* Returns the rate limit settings for the Llama chat completion service.
138+
*
139+
* @return the rate limit settings
140+
*/
99141
public RateLimitSettings rateLimitSettings() {
100142
return this.rateLimitSettings;
101143
}

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/llama/embeddings/LlamaEmbeddingsModel.java

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,22 @@
2121

2222
import java.util.Map;
2323

24+
/**
25+
* Represents a Llama embeddings model for inference.
26+
* This class extends the LlamaModel and provides specific configurations and settings for embeddings tasks.
27+
*/
2428
public class LlamaEmbeddingsModel extends LlamaModel {
2529

30+
/**
31+
* Constructor for creating a LlamaEmbeddingsModel with specified parameters.
32+
*
33+
* @param inferenceEntityId the unique identifier for the inference entity
34+
* @param taskType the type of task this model is designed for
35+
* @param service the name of the inference service
36+
* @param serviceSettings the settings for the inference service, specific to embeddings
37+
* @param secrets the secret settings for the model, such as API keys or tokens
38+
* @param context the context for parsing configuration settings
39+
*/
2640
public LlamaEmbeddingsModel(
2741
String inferenceEntityId,
2842
TaskType taskType,
@@ -44,17 +58,39 @@ public LlamaEmbeddingsModel(
4458
);
4559
}
4660

61+
/**
62+
* Constructor for creating a LlamaEmbeddingsModel with specified parameters.
63+
*
64+
* @param model the base LlamaEmbeddingsModel to copy properties from
65+
* @param serviceSettings the settings for the inference service, specific to embeddings
66+
*/
4767
public LlamaEmbeddingsModel(LlamaEmbeddingsModel model, LlamaEmbeddingsServiceSettings serviceSettings) {
4868
super(model, serviceSettings);
4969
setPropertiesFromServiceSettings(serviceSettings);
5070
}
5171

72+
/**
73+
* Sets properties from the provided LlamaEmbeddingsServiceSettings.
74+
*
75+
* @param serviceSettings the service settings to extract properties from
76+
*/
5277
private void setPropertiesFromServiceSettings(LlamaEmbeddingsServiceSettings serviceSettings) {
5378
this.modelId = serviceSettings.modelId();
5479
this.uri = serviceSettings.uri();
5580
this.rateLimitSettings = serviceSettings.rateLimitSettings();
5681
}
5782

83+
/**
84+
* Constructor for creating a LlamaEmbeddingsModel with specified parameters.
85+
*
86+
* @param inferenceEntityId the unique identifier for the inference entity
87+
* @param taskType the type of task this model is designed for
88+
* @param service the name of the inference service
89+
* @param serviceSettings the settings for the inference service, specific to embeddings
90+
* @param taskSettings the task settings for the model
91+
* @param chunkingSettings the chunking settings for processing input data
92+
* @param secrets the secret settings for the model, such as API keys or tokens
93+
*/
5894
public LlamaEmbeddingsModel(
5995
String inferenceEntityId,
6096
TaskType taskType,
@@ -76,6 +112,12 @@ public LlamaEmbeddingsServiceSettings getServiceSettings() {
76112
return (LlamaEmbeddingsServiceSettings) super.getServiceSettings();
77113
}
78114

115+
/**
116+
* Accepts a visitor to create an executable action for this Llama embeddings model.
117+
*
118+
* @param creator the visitor that creates the executable action
119+
* @return an ExecutableAction representing the Llama embeddings model
120+
*/
79121
public ExecutableAction accept(LlamaActionVisitor creator) {
80122
return creator.create(this);
81123
}

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/llama/embeddings/LlamaEmbeddingsResponseHandler.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,18 @@
1111
import org.elasticsearch.xpack.inference.services.llama.response.LlamaErrorResponse;
1212
import org.elasticsearch.xpack.inference.services.openai.OpenAiResponseHandler;
1313

14+
/**
15+
* Handles responses for Llama embeddings requests, parsing the response and handling errors.
16+
* This class extends OpenAiResponseHandler to provide specific functionality for Llama embeddings.
17+
*/
1418
public class LlamaEmbeddingsResponseHandler extends OpenAiResponseHandler {
1519

20+
/**
21+
* Constructs a new LlamaEmbeddingsResponseHandler with the specified request type and response parser.
22+
*
23+
* @param requestType the type of request this handler will process
24+
* @param parseFunction the function to parse the response
25+
*/
1626
public LlamaEmbeddingsResponseHandler(String requestType, ResponseParser parseFunction) {
1727
super(requestType, parseFunction, LlamaErrorResponse::fromResponse, false);
1828
}

0 commit comments

Comments
 (0)