elastic · jonathan-buttner · Sep 18, 2025 · Sep 9, 2025 · Sep 9, 2025 · Sep 9, 2025
diff --git a/...tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceGetServicesIT.java b/...tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceGetServicesIT.java
@@ -12,6 +12,7 @@
 import org.elasticsearch.client.Request;
 import org.elasticsearch.common.Strings;
 import org.elasticsearch.inference.TaskType;
+import org.junit.Before;
 import org.junit.BeforeClass;
 
 import java.io.IOException;
@@ -23,6 +24,13 @@
 
 public class InferenceGetServicesIT extends BaseMockEISAuthServerTest {
 
+    @Before
+    public void setUp() throws Exception {
+        super.setUp();
+        // Ensure the mock EIS server has an authorized response ready
+        mockEISServer.enqueueAuthorizeAllModelsResponse();
+    }
+
     @BeforeClass
     public static void init() {
         // Ensure the mock EIS server has an authorized response ready
@@ -130,8 +138,9 @@ public void testGetServicesWithRerankTaskType() throws IOException {
     }
 
     public void testGetServicesWithCompletionTaskType() throws IOException {
+        var a = providersFor(TaskType.COMPLETION);
         assertThat(
-            providersFor(TaskType.COMPLETION),
+            a,
             containsInAnyOrder(
                 List.of(
                     "ai21",

diff --git a/.../src/main/java/org/elasticsearch/xpack/inference/mock/TestCompletionServiceExtension.java b/.../src/main/java/org/elasticsearch/xpack/inference/mock/TestCompletionServiceExtension.java
@@ -178,6 +178,7 @@ public static InferenceServiceConfiguration get() {
                     );
 
                     return new InferenceServiceConfiguration.Builder().setService(NAME)
+                        .setName(NAME)
                         .setTaskTypes(SUPPORTED_TASK_TYPES)
                         .setConfigurations(configurationMap)
                         .build();

diff --git a/.../main/java/org/elasticsearch/xpack/inference/mock/TestDenseInferenceServiceExtension.java b/.../main/java/org/elasticsearch/xpack/inference/mock/TestDenseInferenceServiceExtension.java
@@ -283,6 +283,7 @@ public static InferenceServiceConfiguration get() {
                     );
 
                     return new InferenceServiceConfiguration.Builder().setService(NAME)
+                        .setName(NAME)
                         .setTaskTypes(supportedTaskTypes)
                         .setConfigurations(configurationMap)
                         .build();

diff --git a/...n/src/main/java/org/elasticsearch/xpack/inference/mock/TestRerankingServiceExtension.java b/...n/src/main/java/org/elasticsearch/xpack/inference/mock/TestRerankingServiceExtension.java
@@ -228,6 +228,7 @@ public static InferenceServiceConfiguration get() {
                     );
 
                     return new InferenceServiceConfiguration.Builder().setService(NAME)
+                        .setName(NAME)
                         .setTaskTypes(supportedTaskTypes)
                         .setConfigurations(configurationMap)
                         .build();

diff --git a/...main/java/org/elasticsearch/xpack/inference/mock/TestSparseInferenceServiceExtension.java b/...main/java/org/elasticsearch/xpack/inference/mock/TestSparseInferenceServiceExtension.java
@@ -223,7 +223,7 @@ public static InferenceServiceConfiguration get() {
                             .build()
                     );
 
-                    return new InferenceServiceConfiguration.Builder().setService(NAME)
+                    return new InferenceServiceConfiguration.Builder().setService(NAME).setName(NAME)
                         .setTaskTypes(supportedTaskTypes)
                         .setConfigurations(configurationMap)
                         .build();

diff --git a/.../java/org/elasticsearch/xpack/inference/mock/TestStreamingCompletionServiceExtension.java b/.../java/org/elasticsearch/xpack/inference/mock/TestStreamingCompletionServiceExtension.java
@@ -326,6 +326,7 @@ public static InferenceServiceConfiguration get() {
                     );
 
                     return new InferenceServiceConfiguration.Builder().setService(NAME)
+                        .setName(NAME)
                         .setTaskTypes(supportedTaskTypes)
                         .setConfigurations(configurationMap)
                         .build();

diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java
@@ -90,6 +90,7 @@
 import org.elasticsearch.xpack.inference.external.http.retry.RetrySettings;
 import org.elasticsearch.xpack.inference.external.http.sender.HttpRequestSender;
 import org.elasticsearch.xpack.inference.external.http.sender.RequestExecutorServiceSettings;
+import org.elasticsearch.xpack.inference.external.http.sender.Sender;
 import org.elasticsearch.xpack.inference.highlight.SemanticTextHighlighter;
 import org.elasticsearch.xpack.inference.logging.ThrottlerManager;
 import org.elasticsearch.xpack.inference.mapper.OffsetSourceFieldMapper;
@@ -376,6 +377,8 @@ public Collection<?> createComponents(PluginServices services) {
         components.add(modelRegistry.get());
         components.add(httpClientManager);
         components.add(inferenceStatsBinding);
+        components.add(authorizationHandler);
+        components.add(new PluginComponentBinding<>(Sender.class, elasicInferenceServiceFactory.get().createSender()));
 
         // Only add InferenceServiceNodeLocalRateLimitCalculator (which is a ClusterStateListener) for cluster aware rate limiting,
         // if the rate limiting feature flags are enabled, otherwise provide noop implementation

diff --git a/...in/java/org/elasticsearch/xpack/inference/action/TransportGetInferenceServicesAction.java b/...in/java/org/elasticsearch/xpack/inference/action/TransportGetInferenceServicesAction.java
@@ -7,36 +7,55 @@
 
 package org.elasticsearch.xpack.inference.action;
 
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
 import org.elasticsearch.action.ActionListener;
 import org.elasticsearch.action.support.ActionFilters;
 import org.elasticsearch.action.support.HandledTransportAction;
+import org.elasticsearch.action.support.SubscribableListener;
 import org.elasticsearch.common.util.concurrent.EsExecutors;
+import org.elasticsearch.core.Nullable;
 import org.elasticsearch.inference.InferenceService;
 import org.elasticsearch.inference.InferenceServiceConfiguration;
 import org.elasticsearch.inference.InferenceServiceRegistry;
 import org.elasticsearch.inference.TaskType;
 import org.elasticsearch.injection.guice.Inject;
 import org.elasticsearch.tasks.Task;
+import org.elasticsearch.threadpool.ThreadPool;
 import org.elasticsearch.transport.TransportService;
 import org.elasticsearch.xpack.core.inference.action.GetInferenceServicesAction;
+import org.elasticsearch.xpack.inference.external.http.sender.Sender;
+import org.elasticsearch.xpack.inference.services.elastic.ElasticInferenceService;
+import org.elasticsearch.xpack.inference.services.elastic.authorization.ElasticInferenceServiceAuthorizationModel;
+import org.elasticsearch.xpack.inference.services.elastic.authorization.ElasticInferenceServiceAuthorizationRequestHandler;
 
 import java.util.ArrayList;
 import java.util.Comparator;
 import java.util.List;
 import java.util.Map;
 import java.util.stream.Collectors;
 
+import static org.elasticsearch.xpack.inference.InferencePlugin.UTILITY_THREAD_POOL_NAME;
+
 public class TransportGetInferenceServicesAction extends HandledTransportAction<
     GetInferenceServicesAction.Request,
     GetInferenceServicesAction.Response> {
 
+    private static final Logger logger = LogManager.getLogger(TransportGetInferenceServicesAction.class);
+
     private final InferenceServiceRegistry serviceRegistry;
+    private final ElasticInferenceServiceAuthorizationRequestHandler eisAuthorizationRequestHandler;
+    private final Sender eisSender;
+    private final ThreadPool threadPool;
 
     @Inject
     public TransportGetInferenceServicesAction(
         TransportService transportService,
         ActionFilters actionFilters,
-        InferenceServiceRegistry serviceRegistry
+        ThreadPool threadPool,
+        InferenceServiceRegistry serviceRegistry,
+        ElasticInferenceServiceAuthorizationRequestHandler eisAuthorizationRequestHandler,
+        Sender sender
     ) {
         super(
             GetInferenceServicesAction.NAME,
@@ -46,6 +65,9 @@ public TransportGetInferenceServicesAction(
             EsExecutors.DIRECT_EXECUTOR_SERVICE
         );
         this.serviceRegistry = serviceRegistry;
+        this.eisAuthorizationRequestHandler = eisAuthorizationRequestHandler;
+        this.eisSender = sender;
+        this.threadPool = threadPool;
     }
 
     @Override
@@ -69,27 +91,78 @@ private void getServiceConfigurationsForTaskType(
             .entrySet()
             .stream()
             .filter(
-                service -> service.getValue().hideFromConfigurationApi() == false
+                // exclude EIS here because the hideFromConfigurationApi() is not supported
-                // exclude EIS here because the hideFromConfigurationApi() is not supported
+                // Exclude EIS as the EIS specific configurations are handled separately 
-                // exclude EIS here because the hideFromConfigurationApi() is not supported
+                // Exclude EIS as the EIS specific configurations are handled separately 
+                service -> service.getValue().name().equals(ElasticInferenceService.NAME) == false
+                    && service.getValue().hideFromConfigurationApi() == false
                     && service.getValue().supportedTaskTypes().contains(requestedTaskType)
             )
             .sorted(Comparator.comparing(service -> service.getValue().name()))
             .collect(Collectors.toCollection(ArrayList::new));
 
-        getServiceConfigurationsForServices(filteredServices, listener.delegateFailureAndWrap((delegate, configurations) -> {
-            delegate.onResponse(new GetInferenceServicesAction.Response(configurations));
-        }));
+        getServiceConfigurationsForServicesAndEis(listener, filteredServices, requestedTaskType);
     }
 
     private void getAllServiceConfigurations(ActionListener<GetInferenceServicesAction.Response> listener) {
         var availableServices = serviceRegistry.getServices()
             .entrySet()
             .stream()
-            .filter(service -> service.getValue().hideFromConfigurationApi() == false)
+            .filter(
+                // exclude EIS here because the hideFromConfigurationApi() is not supported
+                service -> service.getValue().name().equals(ElasticInferenceService.NAME) == false
+                    && service.getValue().hideFromConfigurationApi() == false
+            )
             .sorted(Comparator.comparing(service -> service.getValue().name()))
             .collect(Collectors.toCollection(ArrayList::new));
-        getServiceConfigurationsForServices(availableServices, listener.delegateFailureAndWrap((delegate, configurations) -> {
-            delegate.onResponse(new GetInferenceServicesAction.Response(configurations));
-        }));
+
+        getServiceConfigurationsForServicesAndEis(listener, availableServices, null);
+    }
+
+    private void getServiceConfigurationsForServicesAndEis(
+        ActionListener<GetInferenceServicesAction.Response> listener,
+        ArrayList<Map.Entry<String, InferenceService>> availableServices,
+        @Nullable TaskType requestedTaskType
+    ) {
+        SubscribableListener.<ElasticInferenceServiceAuthorizationModel>newForked(authModelListener -> {
+            // Executing on a separate thread because there's a chance the authorization call needs to do some initialization for the Sender
+            threadPool.executor(UTILITY_THREAD_POOL_NAME).execute(() -> getEisAuthorization(authModelListener, eisSender));
+        }).<List<InferenceServiceConfiguration>>andThen((configurationListener, authorizationModel) -> {
+
+            ActionListener<List<InferenceServiceConfiguration>> mergeEisConfigListener = configurationListener.delegateFailureAndWrap(
+                (delegate, serviceConfigs) -> {
+                    if (authorizationModel.isAuthorized() == false) {
+                        delegate.onResponse(serviceConfigs);
+                        return;
+                    }
+
+                    var config = ElasticInferenceService.createConfiguration(authorizationModel.getAuthorizedTaskTypes());
+                    if (requestedTaskType != null && authorizationModel.getAuthorizedTaskTypes().contains(requestedTaskType)) {
+                        serviceConfigs.add(config);
+                    }
+                    serviceConfigs.sort(Comparator.comparing(InferenceServiceConfiguration::getService));
+                    delegate.onResponse(serviceConfigs);
+                }
+            );
+
+            getServiceConfigurationsForServices(availableServices, mergeEisConfigListener);
+        })
+            .addListener(
+                listener.delegateFailureAndWrap(
+                    (delegate, configurations) -> delegate.onResponse(new GetInferenceServicesAction.Response(configurations))
+                )
+            );
+    }
+
+    private void getEisAuthorization(ActionListener<ElasticInferenceServiceAuthorizationModel> listener, Sender sender) {
+        var disabledServiceListener = listener.delegateResponse((delegate, e) -> {
+            logger.warn(
+                "Failed to retrieve authorization information from the "
+                    + "Elastic Inference Service while determining service configurations. Marking service as disabled.",
+                e
+            );
+            delegate.onResponse(ElasticInferenceServiceAuthorizationModel.newDisabledService());
+        });
+
+        eisAuthorizationRequestHandler.getAuthorization(disabledServiceListener, sender);
     }
 
     private void getServiceConfigurationsForServices(
@@ -101,7 +174,7 @@ private void getServiceConfigurationsForServices(
             for (var service : services) {
                 serviceConfigurations.add(service.getValue().getConfiguration());
             }
-            listener.onResponse(serviceConfigurations.stream().toList());
+            listener.onResponse(serviceConfigurations);
         } catch (Exception e) {
             listener.onFailure(e);
         }

diff --git a/...main/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceService.java b/...main/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceService.java
@@ -14,7 +14,6 @@
 import org.elasticsearch.cluster.service.ClusterService;
 import org.elasticsearch.common.Strings;
 import org.elasticsearch.common.ValidationException;
-import org.elasticsearch.common.util.LazyInitializable;
 import org.elasticsearch.core.Nullable;
 import org.elasticsearch.core.TimeValue;
 import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper;
@@ -284,7 +283,7 @@ public void waitForFirstAuthorizationToComplete(TimeValue waitTime) {
 
     @Override
     public Set<TaskType> supportedStreamingTasks() {
-        return authorizationHandler.supportedStreamingTasks();
+        return EnumSet.of(TaskType.CHAT_COMPLETION);
     }
 
     @Override
@@ -462,7 +461,12 @@ public void parseRequestConfig(
 
     @Override
     public InferenceServiceConfiguration getConfiguration() {
-        return authorizationHandler.getConfiguration();
+        // This shouldn't be called because the configuration changes based on the authorization
+        // Instead, retrieve the authorization directly from the EIS gateway and use the static method
+        // ElasticInferenceService.Configuration#createConfiguration() to create a configuration based on the authorization response
+        throw new UnsupportedOperationException(
+            "The EIS configuration changes depending on authorization, requests should be made directly to EIS instead"
+        );
     }
 
     @Override
@@ -472,7 +476,11 @@ public EnumSet<TaskType> supportedTaskTypes() {
 
     @Override
     public boolean hideFromConfigurationApi() {
-        return authorizationHandler.hideFromConfigurationApi();
+        // This shouldn't be called because the configuration changes based on the authorization
+        // Instead, retrieve the authorization directly from the EIS gateway and use the response to determine if EIS is authorized
+        throw new UnsupportedOperationException(
+            "The EIS configuration changes depending on authorization, requests should be made directly to EIS instead"
+        );
     }
 
     private static ElasticInferenceServiceModel createModel(
@@ -656,62 +664,45 @@ private TraceContext getCurrentTraceInfo() {
         return new TraceContext(traceParent, traceState);
     }
 
-    public static class Configuration {
-
-        private final EnumSet<TaskType> enabledTaskTypes;
-        private final LazyInitializable<InferenceServiceConfiguration, RuntimeException> configuration;
-
-        public Configuration(EnumSet<TaskType> enabledTaskTypes) {
-            this.enabledTaskTypes = enabledTaskTypes;
-            configuration = initConfiguration();
-        }
-
-        private LazyInitializable<InferenceServiceConfiguration, RuntimeException> initConfiguration() {
-            return new LazyInitializable<>(() -> {
-                var configurationMap = new HashMap<String, SettingsConfiguration>();
-
-                configurationMap.put(
-                    MODEL_ID,
-                    new SettingsConfiguration.Builder(
-                        EnumSet.of(TaskType.SPARSE_EMBEDDING, TaskType.CHAT_COMPLETION, TaskType.RERANK, TaskType.TEXT_EMBEDDING)
-                    ).setDescription("The name of the model to use for the inference task.")
-                        .setLabel("Model ID")
-                        .setRequired(true)
-                        .setSensitive(false)
-                        .setUpdatable(false)
-                        .setType(SettingsConfigurationFieldType.STRING)
-                        .build()
-                );
-
-                configurationMap.put(
-                    MAX_INPUT_TOKENS,
-                    new SettingsConfiguration.Builder(EnumSet.of(TaskType.SPARSE_EMBEDDING, TaskType.TEXT_EMBEDDING)).setDescription(
-                        "Allows you to specify the maximum number of tokens per input."
-                    )
-                        .setLabel("Maximum Input Tokens")
-                        .setRequired(false)
-                        .setSensitive(false)
-                        .setUpdatable(false)
-                        .setType(SettingsConfigurationFieldType.INTEGER)
-                        .build()
-                );
+    public static InferenceServiceConfiguration createConfiguration(EnumSet<TaskType> enabledTaskTypes) {
+        var configurationMap = new HashMap<String, SettingsConfiguration>();
+
+        configurationMap.put(
+            MODEL_ID,
+            new SettingsConfiguration.Builder(
+                EnumSet.of(TaskType.SPARSE_EMBEDDING, TaskType.CHAT_COMPLETION, TaskType.RERANK, TaskType.TEXT_EMBEDDING)
+            ).setDescription("The name of the model to use for the inference task.")
+                .setLabel("Model ID")
+                .setRequired(true)
+                .setSensitive(false)
+                .setUpdatable(false)
+                .setType(SettingsConfigurationFieldType.STRING)
+                .build()
+        );
 
-                configurationMap.putAll(
-                    RateLimitSettings.toSettingsConfiguration(
-                        EnumSet.of(TaskType.SPARSE_EMBEDDING, TaskType.CHAT_COMPLETION, TaskType.RERANK, TaskType.TEXT_EMBEDDING)
-                    )
-                );
+        configurationMap.put(
+            MAX_INPUT_TOKENS,
+            new SettingsConfiguration.Builder(EnumSet.of(TaskType.SPARSE_EMBEDDING, TaskType.TEXT_EMBEDDING)).setDescription(
+                "Allows you to specify the maximum number of tokens per input."
+            )
+                .setLabel("Maximum Input Tokens")
+                .setRequired(false)
+                .setSensitive(false)
+                .setUpdatable(false)
+                .setType(SettingsConfigurationFieldType.INTEGER)
+                .build()
+        );
 
-                return new InferenceServiceConfiguration.Builder().setService(NAME)
-                    .setName(SERVICE_NAME)
-                    .setTaskTypes(enabledTaskTypes)
-                    .setConfigurations(configurationMap)
-                    .build();
-            });
-        }
+        configurationMap.putAll(
+            RateLimitSettings.toSettingsConfiguration(
+                EnumSet.of(TaskType.SPARSE_EMBEDDING, TaskType.CHAT_COMPLETION, TaskType.RERANK, TaskType.TEXT_EMBEDDING)
+            )
+        );
 
-        public InferenceServiceConfiguration get() {
-            return configuration.getOrCompute();
-        }
+        return new InferenceServiceConfiguration.Builder().setService(NAME)
+            .setName(SERVICE_NAME)
+            .setTaskTypes(enabledTaskTypes)
+            .setConfigurations(configurationMap)
+            .build();
     }
 }