elastic
diff --git a/‎x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/huggingface/HuggingFaceEmbeddingsRequestManager.java‎
Lines changed: 0 additions & 88 deletions b/‎x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/huggingface/HuggingFaceEmbeddingsRequestManager.java‎
Lines changed: 0 additions & 88 deletions
diff --git a/‎x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/huggingface/HuggingFaceRequestManager.java‎
Lines changed: 56 additions & 2 deletions b/‎x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/huggingface/HuggingFaceRequestManager.java‎
Lines changed: 56 additions & 2 deletions
diff --git a/‎x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/huggingface/action/HuggingFaceActionCreator.java‎
Lines changed: 3 additions & 3 deletions b/‎x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/huggingface/action/HuggingFaceActionCreator.java‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/huggingface/completion/HuggingFaceChatCompletionServiceSettings.java‎
Lines changed: 18 additions & 17 deletions b/‎x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/huggingface/completion/HuggingFaceChatCompletionServiceSettings.java‎
Lines changed: 18 additions & 17 deletions
diff --git a/‎x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/huggingface/HuggingFaceServiceTests.java‎
Lines changed: 4 additions & 4 deletions b/‎x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/huggingface/HuggingFaceServiceTests.java‎
Lines changed: 4 additions & 4 deletions
@@ -7,12 +7,66 @@
 
 package org.elasticsearch.xpack.inference.services.huggingface;
 
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.elasticsearch.action.ActionListener;
+import org.elasticsearch.inference.InferenceServiceResults;
 import org.elasticsearch.threadpool.ThreadPool;
+import org.elasticsearch.xpack.inference.common.Truncator;
+import org.elasticsearch.xpack.inference.external.http.retry.RequestSender;
+import org.elasticsearch.xpack.inference.external.http.retry.ResponseHandler;
 import org.elasticsearch.xpack.inference.external.http.sender.BaseRequestManager;
+import org.elasticsearch.xpack.inference.external.http.sender.EmbeddingsInput;
+import org.elasticsearch.xpack.inference.external.http.sender.ExecutableInferenceRequest;
+import org.elasticsearch.xpack.inference.external.http.sender.InferenceInputs;
+import org.elasticsearch.xpack.inference.services.huggingface.request.embeddings.HuggingFaceEmbeddingsRequest;
 
-public abstract class HuggingFaceRequestManager extends BaseRequestManager {
-    protected HuggingFaceRequestManager(HuggingFaceModel model, ThreadPool threadPool) {
+import java.util.List;
+import java.util.Objects;
+import java.util.function.Supplier;
+
+import static org.elasticsearch.xpack.inference.common.Truncator.truncate;
+
+public class HuggingFaceRequestManager extends BaseRequestManager {
+    private static final Logger logger = LogManager.getLogger(HuggingFaceRequestManager.class);
+
+    public static HuggingFaceRequestManager of(
+        HuggingFaceModel model,
+        ResponseHandler responseHandler,
+        Truncator truncator,
+        ThreadPool threadPool
+    ) {
+        return new HuggingFaceRequestManager(
+            Objects.requireNonNull(model),
+            Objects.requireNonNull(responseHandler),
+            Objects.requireNonNull(truncator),
+            Objects.requireNonNull(threadPool)
+        );
+    }
+
+    private final HuggingFaceModel model;
+    private final ResponseHandler responseHandler;
+    private final Truncator truncator;
+
+    private HuggingFaceRequestManager(HuggingFaceModel model, ResponseHandler responseHandler, Truncator truncator, ThreadPool threadPool) {
         super(threadPool, model.getInferenceEntityId(), RateLimitGrouping.of(model), model.rateLimitServiceSettings().rateLimitSettings());
+        this.model = model;
+        this.responseHandler = responseHandler;
+        this.truncator = truncator;
+    }
+
+    @Override
+    public void execute(
+        InferenceInputs inferenceInputs,
+        RequestSender requestSender,
+        Supplier<Boolean> hasRequestCompletedFunction,
+        ActionListener<InferenceServiceResults> listener
+    ) {
+        List<String> docsInput = EmbeddingsInput.of(inferenceInputs).getStringInputs();
+        var truncatedInput = truncate(docsInput, model.getTokenLimit());
+        var request = new HuggingFaceEmbeddingsRequest(truncator, truncatedInput, model);
+
+        execute(new ExecutableInferenceRequest(requestSender, logger, request, responseHandler, hasRequestCompletedFunction, listener));
     }
 
     record RateLimitGrouping(int accountHash) {
 
@@ -16,7 +16,7 @@
 import org.elasticsearch.xpack.inference.external.http.sender.Sender;
 import org.elasticsearch.xpack.inference.external.http.sender.UnifiedChatInput;
 import org.elasticsearch.xpack.inference.services.ServiceComponents;
-import org.elasticsearch.xpack.inference.services.huggingface.HuggingFaceEmbeddingsRequestManager;
+import org.elasticsearch.xpack.inference.services.huggingface.HuggingFaceRequestManager;
 import org.elasticsearch.xpack.inference.services.huggingface.HuggingFaceResponseHandler;
 import org.elasticsearch.xpack.inference.services.huggingface.completion.HuggingFaceChatCompletionModel;
 import org.elasticsearch.xpack.inference.services.huggingface.elser.HuggingFaceElserModel;
@@ -58,7 +58,7 @@ public ExecutableAction create(HuggingFaceEmbeddingsModel model) {
             "hugging face text embeddings",
             HuggingFaceEmbeddingsResponseEntity::fromResponse
         );
-        var requestCreator = HuggingFaceEmbeddingsRequestManager.of(
+        var requestCreator = HuggingFaceRequestManager.of(
             model,
             responseHandler,
             serviceComponents.truncator(),
@@ -71,7 +71,7 @@ public ExecutableAction create(HuggingFaceEmbeddingsModel model) {
     @Override
     public ExecutableAction create(HuggingFaceElserModel model) {
         var responseHandler = new HuggingFaceResponseHandler("hugging face elser", HuggingFaceElserResponseEntity::fromResponse);
-        var requestCreator = HuggingFaceEmbeddingsRequestManager.of(
+        var requestCreator = HuggingFaceRequestManager.of(
             model,
             responseHandler,
             serviceComponents.truncator(),
 
@@ -29,9 +29,10 @@
 
 import static org.elasticsearch.xpack.inference.services.ServiceFields.MAX_INPUT_TOKENS;
 import static org.elasticsearch.xpack.inference.services.ServiceFields.MODEL_ID;
+import static org.elasticsearch.xpack.inference.services.ServiceFields.URL;
 import static org.elasticsearch.xpack.inference.services.ServiceUtils.createUri;
 import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractOptionalPositiveInteger;
-import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractRequiredString;
+import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractOptionalString;
 import static org.elasticsearch.xpack.inference.services.huggingface.HuggingFaceServiceSettings.extractUri;
 
 /**
@@ -47,11 +48,9 @@ public class HuggingFaceChatCompletionServiceSettings extends FilteredXContentOb
         HuggingFaceRateLimitServiceSettings {
 
     public static final String NAME = "hugging_face_completion_service_settings";
-    public static final String URL = "url";
     // At the time of writing HuggingFace hasn't posted the default rate limit for inference endpoints so the value his is only a guess
     // 3000 requests per minute
     private static final RateLimitSettings DEFAULT_RATE_LIMIT_SETTINGS = new RateLimitSettings(3000);
-    private static final int DEFAULT_TOKEN_LIMIT = 512;
 
     /**
      * Creates a new instance of {@link HuggingFaceChatCompletionServiceSettings} from a map of settings.
@@ -62,7 +61,7 @@ public class HuggingFaceChatCompletionServiceSettings extends FilteredXContentOb
     public static HuggingFaceChatCompletionServiceSettings fromMap(Map<String, Object> map, ConfigurationParseContext context) {
         ValidationException validationException = new ValidationException();
 
-        String modelId = extractRequiredString(map, MODEL_ID, ModelConfigurations.SERVICE_SETTINGS, validationException);
+        String modelId = extractOptionalString(map, MODEL_ID, ModelConfigurations.SERVICE_SETTINGS, validationException);
 
         var uri = extractUri(map, URL, validationException);
 
@@ -93,7 +92,7 @@ public static HuggingFaceChatCompletionServiceSettings fromMap(Map<String, Objec
     private final RateLimitSettings rateLimitSettings;
 
     public HuggingFaceChatCompletionServiceSettings(
-        String modelId,
+        @Nullable String modelId,
         String url,
         @Nullable Integer maxInputTokens,
         @Nullable RateLimitSettings rateLimitSettings
@@ -102,14 +101,14 @@ public HuggingFaceChatCompletionServiceSettings(
     }
 
     public HuggingFaceChatCompletionServiceSettings(
-        String modelId,
+        @Nullable String modelId,
         URI uri,
         @Nullable Integer maxInputTokens,
         @Nullable RateLimitSettings rateLimitSettings
     ) {
         this.modelId = modelId;
         this.uri = uri;
-        this.maxInputTokens = Objects.requireNonNullElse(maxInputTokens, DEFAULT_TOKEN_LIMIT);
+        this.maxInputTokens = maxInputTokens;
         this.rateLimitSettings = Objects.requireNonNullElse(rateLimitSettings, DEFAULT_RATE_LIMIT_SETTINGS);
     }
 
@@ -119,15 +118,14 @@ public HuggingFaceChatCompletionServiceSettings(
      * @throws IOException if an I/O error occurs
      */
     public HuggingFaceChatCompletionServiceSettings(StreamInput in) throws IOException {
-        this.modelId = in.readString();
+        this.modelId = in.readOptionalString();
         this.uri = createUri(in.readString());
+        this.maxInputTokens = in.readOptionalVInt();
 
         if (in.getTransportVersion().onOrAfter(TransportVersions.V_8_15_0)) {
             this.rateLimitSettings = new RateLimitSettings(in);
-            this.maxInputTokens = in.readOptionalVInt();
         } else {
             this.rateLimitSettings = DEFAULT_RATE_LIMIT_SETTINGS;
-            this.maxInputTokens = DEFAULT_TOKEN_LIMIT;
         }
     }
 
@@ -141,7 +139,7 @@ public URI uri() {
         return uri;
     }
 
-    public int maxInputTokens() {
+    public Integer maxInputTokens() {
         return maxInputTokens;
     }
 
@@ -161,10 +159,13 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
 
     @Override
     protected XContentBuilder toXContentFragmentOfExposedFields(XContentBuilder builder, Params params) throws IOException {
-        builder.field(MODEL_ID, modelId);
-
+        if (modelId != null) {
+            builder.field(MODEL_ID, modelId);
+        }
         builder.field(URL, uri.toString());
-        builder.field(MAX_INPUT_TOKENS, maxInputTokens);
+        if (maxInputTokens != null) {
+            builder.field(MAX_INPUT_TOKENS, maxInputTokens);
+        }
         rateLimitSettings.toXContent(builder, params);
 
         return builder;
@@ -177,13 +178,13 @@ public String getWriteableName() {
 
     @Override
     public TransportVersion getMinimalSupportedVersion() {
-        return TransportVersions.V_8_12_0;
+        return TransportVersions.V_8_14_0;
     }
 
     @Override
     public void writeTo(StreamOutput out) throws IOException {
-        out.writeString(modelId);
-        out.writeOptionalString(uri != null ? uri.toString() : null);
+        out.writeOptionalString(modelId);
+        out.writeString(uri.toString());
         out.writeOptionalVInt(maxInputTokens);
 
         if (out.getTransportVersion().onOrAfter(TransportVersions.V_8_15_0)) {
 
@@ -821,7 +821,7 @@ public void testGetConfiguration() throws Exception {
                 {
                        "service": "hugging_face",
                        "name": "Hugging Face",
-                       "task_types": ["text_embedding", "sparse_embedding"],
+                       "task_types": ["text_embedding", "sparse_embedding", "completion", "chat_completion"],
                        "configurations": {
                            "api_key": {
                                "description": "API Key for the provider you're connecting to.",
@@ -830,7 +830,7 @@ public void testGetConfiguration() throws Exception {
                                "sensitive": true,
                                "updatable": true,
                                "type": "str",
-                               "supported_task_types": ["text_embedding", "sparse_embedding"]
+                               "supported_task_types": ["text_embedding", "sparse_embedding", "completion", "chat_completion"]
                            },
                            "rate_limit.requests_per_minute": {
                                "description": "Minimize the number of rate limit errors.",
@@ -839,7 +839,7 @@ public void testGetConfiguration() throws Exception {
                                "sensitive": false,
                                "updatable": false,
                                "type": "int",
-                               "supported_task_types": ["text_embedding", "sparse_embedding"]
+                               "supported_task_types": ["text_embedding", "sparse_embedding", "completion", "chat_completion"]
                            },
                            "url": {
                                "default_value": "https://api.openai.com/v1/embeddings",
@@ -849,7 +849,7 @@ public void testGetConfiguration() throws Exception {
                                "sensitive": false,
                                "updatable": false,
                                "type": "str",
-                               "supported_task_types": ["text_embedding", "sparse_embedding"]
+                               "supported_task_types": ["text_embedding", "sparse_embedding", "completion", "chat_completion"]
                            }
                        }
                    }