Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,6 @@ public class BaseMockEISAuthServerTest extends ESRestTestCase {
.setting("xpack.security.enabled", "true")
// Adding both settings unless one feature flag is disabled in a particular environment
.setting("xpack.inference.elastic.url", mockEISServer::getUrl)
// TODO remove this once we've removed DEPRECATED_ELASTIC_INFERENCE_SERVICE_FEATURE_FLAG and EIS_GATEWAY_URL
.setting("xpack.inference.eis.gateway.url", mockEISServer::getUrl)
// This plugin is located in the inference/qa/test-service-plugin package, look for TestInferenceServicePlugin
.plugin("inference-service-test")
.user("x_pack_rest_user", "x-pack-test-password")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
package org.elasticsearch.xpack.inference;

import org.elasticsearch.inference.TaskType;
import org.elasticsearch.xpack.inference.services.elastic.ElasticInferenceServiceFeature;

import java.io.IOException;

Expand All @@ -24,18 +23,11 @@ public void testGetDefaultEndpoints() throws IOException {
var allModels = getAllModels();
var chatCompletionModels = getModels("_all", TaskType.CHAT_COMPLETION);

if ((ElasticInferenceServiceFeature.DEPRECATED_ELASTIC_INFERENCE_SERVICE_FEATURE_FLAG.isEnabled()
|| ElasticInferenceServiceFeature.ELASTIC_INFERENCE_SERVICE_FEATURE_FLAG.isEnabled())) {
assertThat(allModels, hasSize(4));
assertThat(chatCompletionModels, hasSize(1));

for (var model : chatCompletionModels) {
assertEquals("chat_completion", model.get("task_type"));
}
} else {
assertThat(allModels, hasSize(3));
assertThat(chatCompletionModels, hasSize(0));
}
assertThat(allModels, hasSize(4));
assertThat(chatCompletionModels, hasSize(1));

for (var model : chatCompletionModels) {
assertEquals("chat_completion", model.get("task_type"));
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,9 @@
import org.elasticsearch.client.Request;
import org.elasticsearch.common.Strings;
import org.elasticsearch.inference.TaskType;
import org.elasticsearch.xpack.inference.services.elastic.ElasticInferenceServiceFeature;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;

Expand All @@ -28,27 +26,23 @@ public class InferenceGetServicesIT extends BaseMockEISAuthServerTest {
@SuppressWarnings("unchecked")
public void testGetServicesWithoutTaskType() throws IOException {
List<Object> services = getAllServices();
if ((ElasticInferenceServiceFeature.DEPRECATED_ELASTIC_INFERENCE_SERVICE_FEATURE_FLAG.isEnabled()
|| ElasticInferenceServiceFeature.ELASTIC_INFERENCE_SERVICE_FEATURE_FLAG.isEnabled())) {
assertThat(services.size(), equalTo(19));
} else {
assertThat(services.size(), equalTo(18));
}
assertThat(services.size(), equalTo(19));

String[] providers = new String[services.size()];
for (int i = 0; i < services.size(); i++) {
Map<String, Object> serviceConfig = (Map<String, Object>) services.get(i);
providers[i] = (String) serviceConfig.get("service");
}

var providerList = new ArrayList<>(
Arrays.asList(
assertArrayEquals(
List.of(
"alibabacloud-ai-search",
"amazonbedrock",
"anthropic",
"azureaistudio",
"azureopenai",
"cohere",
"elastic",
"elasticsearch",
"googleaistudio",
"googlevertexai",
Expand All @@ -61,13 +55,9 @@ public void testGetServicesWithoutTaskType() throws IOException {
"test_service",
"text_embedding_test_service",
"watsonxai"
)
).toArray(),
providers
);
if ((ElasticInferenceServiceFeature.DEPRECATED_ELASTIC_INFERENCE_SERVICE_FEATURE_FLAG.isEnabled()
|| ElasticInferenceServiceFeature.ELASTIC_INFERENCE_SERVICE_FEATURE_FLAG.isEnabled())) {
providerList.add(6, "elastic");
}
assertArrayEquals(providerList.toArray(), providers);
}

@SuppressWarnings("unchecked")
Expand Down Expand Up @@ -150,52 +140,33 @@ public void testGetServicesWithCompletionTaskType() throws IOException {
@SuppressWarnings("unchecked")
public void testGetServicesWithChatCompletionTaskType() throws IOException {
List<Object> services = getServices(TaskType.CHAT_COMPLETION);
if ((ElasticInferenceServiceFeature.DEPRECATED_ELASTIC_INFERENCE_SERVICE_FEATURE_FLAG.isEnabled()
|| ElasticInferenceServiceFeature.ELASTIC_INFERENCE_SERVICE_FEATURE_FLAG.isEnabled())) {
assertThat(services.size(), equalTo(3));
} else {
assertThat(services.size(), equalTo(2));
}
assertThat(services.size(), equalTo(3));

String[] providers = new String[services.size()];
for (int i = 0; i < services.size(); i++) {
Map<String, Object> serviceConfig = (Map<String, Object>) services.get(i);
providers[i] = (String) serviceConfig.get("service");
}

var providerList = new ArrayList<>(List.of("openai", "streaming_completion_test_service"));

if ((ElasticInferenceServiceFeature.DEPRECATED_ELASTIC_INFERENCE_SERVICE_FEATURE_FLAG.isEnabled()
|| ElasticInferenceServiceFeature.ELASTIC_INFERENCE_SERVICE_FEATURE_FLAG.isEnabled())) {
providerList.add(0, "elastic");
}

assertArrayEquals(providers, providerList.toArray());
assertArrayEquals(List.of("elastic", "openai", "streaming_completion_test_service").toArray(), providers);
}

@SuppressWarnings("unchecked")
public void testGetServicesWithSparseEmbeddingTaskType() throws IOException {
List<Object> services = getServices(TaskType.SPARSE_EMBEDDING);

if ((ElasticInferenceServiceFeature.DEPRECATED_ELASTIC_INFERENCE_SERVICE_FEATURE_FLAG.isEnabled()
|| ElasticInferenceServiceFeature.ELASTIC_INFERENCE_SERVICE_FEATURE_FLAG.isEnabled())) {
assertThat(services.size(), equalTo(5));
} else {
assertThat(services.size(), equalTo(4));
}
assertThat(services.size(), equalTo(5));

String[] providers = new String[services.size()];
for (int i = 0; i < services.size(); i++) {
Map<String, Object> serviceConfig = (Map<String, Object>) services.get(i);
providers[i] = (String) serviceConfig.get("service");
}

var providerList = new ArrayList<>(Arrays.asList("alibabacloud-ai-search", "elasticsearch", "hugging_face", "test_service"));
if ((ElasticInferenceServiceFeature.DEPRECATED_ELASTIC_INFERENCE_SERVICE_FEATURE_FLAG.isEnabled()
|| ElasticInferenceServiceFeature.ELASTIC_INFERENCE_SERVICE_FEATURE_FLAG.isEnabled())) {
providerList.add(1, "elastic");
}
assertArrayEquals(providers, providerList.toArray());
assertArrayEquals(
List.of("alibabacloud-ai-search", "elastic", "elasticsearch", "hugging_face", "test_service").toArray(),
providers
);
}

private List<Object> getAllServices() throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,9 +133,6 @@
import java.util.function.Supplier;

import static java.util.Collections.singletonList;
import static org.elasticsearch.xpack.inference.services.elastic.ElasticInferenceService.ELASTIC_INFERENCE_SERVICE_IDENTIFIER;
import static org.elasticsearch.xpack.inference.services.elastic.ElasticInferenceServiceFeature.DEPRECATED_ELASTIC_INFERENCE_SERVICE_FEATURE_FLAG;
import static org.elasticsearch.xpack.inference.services.elastic.ElasticInferenceServiceFeature.ELASTIC_INFERENCE_SERVICE_FEATURE_FLAG;

public class InferencePlugin extends Plugin
implements
Expand Down Expand Up @@ -252,46 +249,44 @@ public Collection<?> createComponents(PluginServices services) {
var inferenceServices = new ArrayList<>(inferenceServiceExtensions);
inferenceServices.add(this::getInferenceServiceFactories);

if (isElasticInferenceServiceEnabled()) {
// Create a separate instance of HTTPClientManager with its own SSL configuration (`xpack.inference.elastic.http.ssl.*`).
var elasticInferenceServiceHttpClientManager = HttpClientManager.create(
settings,
services.threadPool(),
services.clusterService(),
throttlerManager,
getSslService()
);

var elasticInferenceServiceRequestSenderFactory = new HttpRequestSender.Factory(
serviceComponents.get(),
elasticInferenceServiceHttpClientManager,
services.clusterService()
);
elasicInferenceServiceFactory.set(elasticInferenceServiceRequestSenderFactory);

ElasticInferenceServiceSettings inferenceServiceSettings = new ElasticInferenceServiceSettings(settings);
String elasticInferenceUrl = this.getElasticInferenceServiceUrl(inferenceServiceSettings);

var elasticInferenceServiceComponentsInstance = new ElasticInferenceServiceComponents(elasticInferenceUrl);
elasticInferenceServiceComponents.set(elasticInferenceServiceComponentsInstance);

var authorizationHandler = new ElasticInferenceServiceAuthorizationHandler(
elasticInferenceServiceComponentsInstance.elasticInferenceServiceUrl(),
services.threadPool()
);

inferenceServices.add(
() -> List.of(
context -> new ElasticInferenceService(
elasicInferenceServiceFactory.get(),
serviceComponents.get(),
elasticInferenceServiceComponentsInstance,
modelRegistry,
authorizationHandler
)
// Create a separate instance of HTTPClientManager with its own SSL configuration (`xpack.inference.elastic.http.ssl.*`).
var elasticInferenceServiceHttpClientManager = HttpClientManager.create(
settings,
services.threadPool(),
services.clusterService(),
throttlerManager,
getSslService()
);

var elasticInferenceServiceRequestSenderFactory = new HttpRequestSender.Factory(
serviceComponents.get(),
elasticInferenceServiceHttpClientManager,
services.clusterService()
);
elasicInferenceServiceFactory.set(elasticInferenceServiceRequestSenderFactory);

ElasticInferenceServiceSettings inferenceServiceSettings = new ElasticInferenceServiceSettings(settings);
String elasticInferenceUrl = inferenceServiceSettings.getElasticInferenceServiceUrl();

var elasticInferenceServiceComponentsInstance = new ElasticInferenceServiceComponents(elasticInferenceUrl);
elasticInferenceServiceComponents.set(elasticInferenceServiceComponentsInstance);

var authorizationHandler = new ElasticInferenceServiceAuthorizationHandler(
elasticInferenceServiceComponentsInstance.elasticInferenceServiceUrl(),
services.threadPool()
);

inferenceServices.add(
() -> List.of(
context -> new ElasticInferenceService(
elasicInferenceServiceFactory.get(),
serviceComponents.get(),
elasticInferenceServiceComponentsInstance,
modelRegistry,
authorizationHandler
)
);
}
)
);

var factoryContext = new InferenceServiceExtension.InferenceServiceFactoryContext(
services.client(),
Expand Down Expand Up @@ -420,11 +415,7 @@ public List<Setting<?>> getSettings() {
settings.addAll(Truncator.getSettingsDefinitions());
settings.addAll(RequestExecutorServiceSettings.getSettingsDefinitions());
settings.add(SKIP_VALIDATE_AND_START);

// Register Elastic Inference Service settings definitions if the corresponding feature flag is enabled.
if (isElasticInferenceServiceEnabled()) {
settings.addAll(ElasticInferenceServiceSettings.getSettingsDefinitions());
}
settings.addAll(ElasticInferenceServiceSettings.getSettingsDefinitions());

return settings;
}
Expand Down Expand Up @@ -499,25 +490,7 @@ public Map<String, Highlighter> getHighlighters() {
// Get Elastic Inference service URL based on feature flags to support transitioning
// to the new Elastic Inference Service URL.
private String getElasticInferenceServiceUrl(ElasticInferenceServiceSettings settings) {
String elasticInferenceUrl = null;

if (ELASTIC_INFERENCE_SERVICE_FEATURE_FLAG.isEnabled()) {
elasticInferenceUrl = settings.getElasticInferenceServiceUrl();
} else if (DEPRECATED_ELASTIC_INFERENCE_SERVICE_FEATURE_FLAG.isEnabled()) {
log.warn(
"Deprecated flag {} detected for enabling {}. Please use {}.",
ELASTIC_INFERENCE_SERVICE_IDENTIFIER,
DEPRECATED_ELASTIC_INFERENCE_SERVICE_FEATURE_FLAG,
ELASTIC_INFERENCE_SERVICE_FEATURE_FLAG
);
elasticInferenceUrl = settings.getEisGatewayUrl();
}

return elasticInferenceUrl;
}

protected Boolean isElasticInferenceServiceEnabled() {
return (ELASTIC_INFERENCE_SERVICE_FEATURE_FLAG.isEnabled() || DEPRECATED_ELASTIC_INFERENCE_SERVICE_FEATURE_FLAG.isEnabled());
return settings.getElasticInferenceServiceUrl();
}

protected SSLService getSslService() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,12 @@
import org.elasticsearch.common.util.FeatureFlag;

/**
* Elastic Inference Service (EIS) feature flag. When the feature is complete, this flag will be removed.
* Enable feature via JVM option: `-Des.elastic_inference_service_feature_flag_enabled=true`.
* Elastic Inference Service feature flag. Not being used anymore, but we'll keep it until the controller is no longer
* passing -Des.elastic_inference_service_feature_flag_enabled=true at startup.
*/
public class ElasticInferenceServiceFeature {

// TODO when we remove this also look in InferenceGetServicesIT and remove references to the deprecated URL setting
@Deprecated
public static final FeatureFlag DEPRECATED_ELASTIC_INFERENCE_SERVICE_FEATURE_FLAG = new FeatureFlag("eis");

public static final FeatureFlag ELASTIC_INFERENCE_SERVICE_FEATURE_FLAG = new FeatureFlag("elastic_inference_service");

}
Original file line number Diff line number Diff line change
Expand Up @@ -20,25 +20,16 @@
*/
public class ElasticInferenceServiceSettings {

// TODO when we remove this look at InferenceGetServicesIT and remove the setting there as well
@Deprecated
static final Setting<String> EIS_GATEWAY_URL = Setting.simpleString("xpack.inference.eis.gateway.url", Setting.Property.NodeScope);

public static final String ELASTIC_INFERENCE_SERVICE_SSL_CONFIGURATION_PREFIX = "xpack.inference.elastic.http.ssl.";

static final Setting<String> ELASTIC_INFERENCE_SERVICE_URL = Setting.simpleString(
"xpack.inference.elastic.url",
Setting.Property.NodeScope
);

// Adjust this variable to be volatile, if the setting can be updated at some point in time
@Deprecated
private final String eisGatewayUrl;

private final String elasticInferenceServiceUrl;

public ElasticInferenceServiceSettings(Settings settings) {
eisGatewayUrl = EIS_GATEWAY_URL.get(settings);
elasticInferenceServiceUrl = ELASTIC_INFERENCE_SERVICE_URL.get(settings);
}

Expand All @@ -55,19 +46,13 @@ public ElasticInferenceServiceSettings(Settings settings) {

public static List<Setting<?>> getSettingsDefinitions() {
ArrayList<Setting<?>> settings = new ArrayList<>();
settings.add(EIS_GATEWAY_URL);
settings.add(ELASTIC_INFERENCE_SERVICE_URL);
settings.add(ELASTIC_INFERENCE_SERVICE_SSL_ENABLED);
settings.addAll(ELASTIC_INFERENCE_SERVICE_SSL_CONFIGURATION_SETTINGS.getEnabledSettings());

return settings;
}

@Deprecated
public String getEisGatewayUrl() {
return eisGatewayUrl;
}

public String getElasticInferenceServiceUrl() {
return elasticInferenceServiceUrl;
}
Expand Down
Loading