Add endpoint creation validation for ElasticsearchInternalService

dan-rubinstein · dan-rubinstein · commit 0c659099a8f7 · 2024-11-06T14:02:13.000-05:00
diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/TransportPutInferenceModelAction.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/TransportPutInferenceModelAction.java
@@ -172,7 +172,7 @@ private void parseAndStoreModel(
         ActionListener<Model> storeModelListener = listener.delegateFailureAndWrap(
             (delegate, verifiedModel) -> modelRegistry.storeModel(
                 verifiedModel,
-                ActionListener.wrap(r -> startInferenceEndpoint(service, verifiedModel, delegate), e -> {
+                ActionListener.wrap(r -> listener.onResponse(createResponse(verifiedModel.getConfigurations())), e -> {
                     if (e.getCause() instanceof StrictDynamicMappingException && e.getCause().getMessage().contains("chunking_settings")) {
                         delegate.onFailure(
                             new ElasticsearchStatusException(
@@ -199,12 +199,8 @@ private void parseAndStoreModel(
         service.parseRequestConfig(inferenceEntityId, taskType, config, parsedModelListener);
     }
 
-    private void startInferenceEndpoint(InferenceService service, Model model, ActionListener<PutInferenceModelAction.Response> listener) {
-        if (skipValidationAndStart) {
-            listener.onResponse(new PutInferenceModelAction.Response(model.getConfigurations()));
-        } else {
-            service.start(model, listener.map(started -> new PutInferenceModelAction.Response(model.getConfigurations())));
-        }
+    private PutInferenceModelAction.Response createResponse(ModelConfigurations configurations) {
+        return new PutInferenceModelAction.Response(configurations);
     }
 
     private Map<String, Object> requestToMap(PutInferenceModelAction.Request request) throws IOException {
diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalService.java
@@ -59,7 +59,7 @@
 import org.elasticsearch.xpack.inference.chunking.ChunkingSettingsBuilder;
 import org.elasticsearch.xpack.inference.chunking.EmbeddingRequestChunker;
 import org.elasticsearch.xpack.inference.services.ConfigurationParseContext;
-import org.elasticsearch.xpack.inference.services.ServiceUtils;
+import org.elasticsearch.xpack.inference.services.validation.ModelValidatorBuilder;
 
 import java.util.ArrayList;
 import java.util.Collections;
@@ -499,49 +499,38 @@ public Model parsePersistedConfig(String inferenceEntityId, TaskType taskType, M
 
     @Override
     public void checkModelConfig(Model model, ActionListener<Model> listener) {
-        if (model instanceof CustomElandEmbeddingModel elandModel && elandModel.getTaskType() == TaskType.TEXT_EMBEDDING) {
-            // At this point the inference endpoint configuration has not been persisted yet, if we attempt to do inference using the
-            // inference id we'll get an error because the trained model code needs to use the persisted inference endpoint to retrieve the
-            // model id. To get around this we'll have the getEmbeddingSize() method use the model id instead of inference id. So we need
-            // to create a temporary model that overrides the inference id with the model id.
-            var temporaryModelWithModelId = new CustomElandEmbeddingModel(
-                elandModel.getServiceSettings().modelId(),
-                elandModel.getTaskType(),
-                elandModel.getConfigurations().getService(),
-                elandModel.getServiceSettings(),
-                elandModel.getConfigurations().getChunkingSettings()
+        ModelValidatorBuilder.buildModelValidator(model.getTaskType(), true).validate(this, model, listener);
+    }
+
+    @Override
+    public Model updateModelWithEmbeddingDetails(Model model, int embeddingSize) {
+        if (model instanceof CustomElandEmbeddingModel embeddingsModel) {
+            var serviceSettings = embeddingsModel.getServiceSettings();
+
+            var updatedServiceSettings = new CustomElandInternalTextEmbeddingServiceSettings(
+                serviceSettings.getNumAllocations(),
+                serviceSettings.getNumThreads(),
+                serviceSettings.modelId(),
+                serviceSettings.getAdaptiveAllocationsSettings(),
+                embeddingSize,
+                serviceSettings.similarity(),
+                serviceSettings.elementType()
             );
 
-            ServiceUtils.getEmbeddingSize(
-                temporaryModelWithModelId,
-                this,
-                listener.delegateFailureAndWrap((l, size) -> l.onResponse(updateModelWithEmbeddingDetails(elandModel, size)))
+            return new CustomElandEmbeddingModel(
+                model.getInferenceEntityId(),
+                model.getTaskType(),
+                model.getConfigurations().getService(),
+                updatedServiceSettings,
+                model.getConfigurations().getChunkingSettings()
             );
         } else {
-            listener.onResponse(model);
+            // TODO: This is for the E5 case which is text embedding but we didn't previously update the dimensions. Figure out if we do
+            // need to update the dimensions?
+            return model;
         }
     }
 
-    private static CustomElandEmbeddingModel updateModelWithEmbeddingDetails(CustomElandEmbeddingModel model, int embeddingSize) {
-        CustomElandInternalTextEmbeddingServiceSettings serviceSettings = new CustomElandInternalTextEmbeddingServiceSettings(
-            model.getServiceSettings().getNumAllocations(),
-            model.getServiceSettings().getNumThreads(),
-            model.getServiceSettings().modelId(),
-            model.getServiceSettings().getAdaptiveAllocationsSettings(),
-            embeddingSize,
-            model.getServiceSettings().similarity(),
-            model.getServiceSettings().elementType()
-        );
-
-        return new CustomElandEmbeddingModel(
-            model.getInferenceEntityId(),
-            model.getTaskType(),
-            model.getConfigurations().getService(),
-            serviceSettings,
-            model.getConfigurations().getChunkingSettings()
-        );
-    }
-
     @Override
     public void infer(
         Model model,
@@ -904,7 +893,10 @@ private List<Model> defaultConfigs(boolean useLinuxOptimizedModel) {
 
     @Override
     boolean isDefaultId(String inferenceId) {
-        return DEFAULT_ELSER_ID.equals(inferenceId) || DEFAULT_E5_ID.equals(inferenceId);
+        // return DEFAULT_ELSER_ID.equals(inferenceId) || DEFAULT_E5_ID.equals(inferenceId);
+        // TODO: This is a temporary override to ensure that we always deploy models on infer to run a validation call.
+        // Figure out if this is what we actually want to do?
+        return true;
     }
 
     static EmbeddingRequestChunker.EmbeddingType embeddingTypeFromTaskTypeAndSettings(
diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/validation/ElasticsearchInternalServiceModelValidator.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/validation/ElasticsearchInternalServiceModelValidator.java
@@ -0,0 +1,42 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.inference.services.validation;
+
+import org.elasticsearch.action.ActionListener;
+import org.elasticsearch.inference.InferenceService;
+import org.elasticsearch.inference.Model;
+import org.elasticsearch.xpack.inference.services.elasticsearch.CustomElandEmbeddingModel;
+
+public class ElasticsearchInternalServiceModelValidator implements ModelValidator {
+
+    private final ModelValidator modelValidator;
+
+    public ElasticsearchInternalServiceModelValidator(ModelValidator modelValidator) {
+        this.modelValidator = modelValidator;
+    }
+
+    @Override
+    public void validate(InferenceService service, Model model, ActionListener<Model> listener) {
+        var modelToValidate = model;
+        if (model instanceof CustomElandEmbeddingModel esModel) {
+            modelToValidate = new CustomElandEmbeddingModel(
+                esModel.getServiceSettings().modelId(),
+                esModel.getTaskType(),
+                esModel.getConfigurations().getService(),
+                esModel.getServiceSettings(),
+                esModel.getConfigurations().getChunkingSettings()
+            );
+        }
+
+        modelValidator.validate(
+            service,
+            modelToValidate,
+            listener.delegateFailureAndWrap((delegate, r) -> { delegate.onResponse(model); })
+        );
+    }
+}
diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/validation/ModelValidatorBuilder.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/validation/ModelValidatorBuilder.java
@@ -11,6 +11,18 @@
 import org.elasticsearch.inference.TaskType;
 
 public class ModelValidatorBuilder {
+
+    // TODO: Once we merge all the other service validation code we can remove the checkModelConfig function
+    // from each service, private the buildModelValidator function below this one, and call this directly from
+    // TransportPutInferenceModelAction.java.
+    public static ModelValidator buildModelValidator(TaskType taskType, boolean isElasticsearchInternalService) {
+        if (isElasticsearchInternalService) {
+            return new ElasticsearchInternalServiceModelValidator(buildModelValidator(taskType));
+        } else {
+            return buildModelValidator(taskType);
+        }
+    }
+
     public static ModelValidator buildModelValidator(TaskType taskType) {
         if (taskType == null) {
             throw new IllegalArgumentException("Task type can't be null");