WIP

davidkyle · davidkyle · commit 6600017806aa · 2024-10-14T18:43:41.000+01:00
diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java
@@ -243,6 +243,7 @@ static TransportVersion def(int id) {
     public static final TransportVersion CHUNK_SENTENCE_OVERLAP_SETTING_ADDED = def(8_767_00_0);
     public static final TransportVersion OPT_IN_ESQL_CCS_EXECUTION_INFO = def(8_768_00_0);
     public static final TransportVersion QUERY_RULE_TEST_API = def(8_769_00_0);
+    public static final TransportVersion ML_INFERENCE_ATTACH_TO_EXISTSING_DEPLOYMENT = def(8_770_00_0);
 
     /*
      * STOP! READ THIS FIRST! No, really,
diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/TransportDeleteInferenceEndpointAction.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/TransportDeleteInferenceEndpointAction.java
@@ -47,7 +47,6 @@ public class TransportDeleteInferenceEndpointAction extends TransportMasterNodeA
 
     private final ModelRegistry modelRegistry;
     private final InferenceServiceRegistry serviceRegistry;
-    private static final Logger logger = LogManager.getLogger(TransportDeleteInferenceEndpointAction.class);
     private final Executor executor;
 
     @Inject
@@ -118,7 +117,11 @@ private void doExecuteForked(
 
             var service = serviceRegistry.getService(unparsedModel.service());
             if (service.isPresent()) {
-                service.get().stop(request.getInferenceEndpointId(), listener);
+                if (service.get().isInClusterService()) {
+                    // check for other models using this deployment
+                } else {
+                    service.get().stop(request.getInferenceEndpointId(), listener);
+                }
             } else {
                 listener.onFailure(
                     new ElasticsearchStatusException(
diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/BaseElasticsearchInternalService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/BaseElasticsearchInternalService.java
@@ -120,6 +120,7 @@ public void start(Model model, ActionListener<Boolean> finalListener) {
 
     @Override
     public void stop(String inferenceEntityId, ActionListener<Boolean> listener) {
+        // TODO check if other inference endpoints are using this deployment
         var request = new StopTrainedModelDeploymentAction.Request(inferenceEntityId);
         request.setForce(true);
         client.execute(
diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalModel.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalModel.java
@@ -91,4 +91,8 @@ public ElasticsearchInternalServiceSettings getServiceSettings() {
     public String toString() {
         return Strings.toString(this.getConfigurations());
     }
+
+    public String mlNodeDeploymentId() {
+        return internalServiceSettings.getDeploymentId() == null ? getInferenceEntityId() : internalServiceSettings.getDeploymentId();
+    }
 }
diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalService.java
@@ -33,14 +33,20 @@
 import org.elasticsearch.xpack.core.inference.results.RankedDocsResults;
 import org.elasticsearch.xpack.core.inference.results.SparseEmbeddingResults;
 import org.elasticsearch.xpack.core.ml.action.GetTrainedModelsAction;
+import org.elasticsearch.xpack.core.ml.action.GetTrainedModelsStatsAction;
 import org.elasticsearch.xpack.core.ml.action.InferModelAction;
 import org.elasticsearch.xpack.core.ml.inference.assignment.AdaptiveAllocationsSettings;
+import org.elasticsearch.xpack.core.ml.inference.assignment.AssignmentStats;
 import org.elasticsearch.xpack.core.ml.inference.results.ErrorInferenceResults;
 import org.elasticsearch.xpack.core.ml.inference.results.MlTextEmbeddingResults;
 import org.elasticsearch.xpack.core.ml.inference.results.TextExpansionResults;
 import org.elasticsearch.xpack.core.ml.inference.trainedmodel.EmptyConfigUpdate;
+import org.elasticsearch.xpack.core.ml.inference.trainedmodel.InferenceConfig;
+import org.elasticsearch.xpack.core.ml.inference.trainedmodel.TextEmbeddingConfig;
 import org.elasticsearch.xpack.core.ml.inference.trainedmodel.TextEmbeddingConfigUpdate;
+import org.elasticsearch.xpack.core.ml.inference.trainedmodel.TextExpansionConfig;
 import org.elasticsearch.xpack.core.ml.inference.trainedmodel.TextExpansionConfigUpdate;
+import org.elasticsearch.xpack.core.ml.inference.trainedmodel.TextSimilarityConfig;
 import org.elasticsearch.xpack.core.ml.inference.trainedmodel.TextSimilarityConfigUpdate;
 import org.elasticsearch.xpack.inference.chunking.ChunkingSettingsBuilder;
 import org.elasticsearch.xpack.inference.chunking.EmbeddingRequestChunker;
@@ -52,6 +58,7 @@
 import java.util.EnumSet;
 import java.util.List;
 import java.util.Map;
+import java.util.Optional;
 import java.util.Set;
 import java.util.function.Consumer;
 import java.util.function.Function;
@@ -134,7 +141,10 @@ public void parseRequestConfig(
             throwIfNotEmptyMap(config, name());
 
             String modelId = (String) serviceSettingsMap.get(ElasticsearchInternalServiceSettings.MODEL_ID);
-            if (modelId == null) {
+            String deploymentId = (String) serviceSettingsMap.get(ElasticsearchInternalServiceSettings.DEPLOYMENT_ID);
+            if (deploymentId != null) {
+                validateAgainstDeployment(modelId, deploymentId, taskType, ) // TODO create model
+            } else if (modelId == null) {
                 if (OLD_ELSER_SERVICE_NAME.equals(serviceName)) {
                     // TODO complete deprecation of null model ID
                     // throw new ValidationException().addValidationError("Error parsing request config, model id is missing");
@@ -215,6 +225,8 @@ private void customElandCase(
                         + "]. You may need to load it into the cluster using eland."
                 );
             } else {
+                throwIfUnsupportedTaskType(modelId, taskType, response.getResources().results().get(0).getInferenceConfig());
+
                 var model = createCustomElandModel(
                     inferenceEntityId,
                     taskType,
@@ -553,7 +565,7 @@ public void inferTextEmbedding(
         ActionListener<InferenceServiceResults> listener
     ) {
         var request = buildInferenceRequest(
-            model.getConfigurations().getInferenceEntityId(),
+            model.mlNodeDeploymentId(),
             TextEmbeddingConfigUpdate.EMPTY_INSTANCE,
             inputs,
             inputType,
@@ -579,7 +591,7 @@ public void inferSparseEmbedding(
         ActionListener<InferenceServiceResults> listener
     ) {
         var request = buildInferenceRequest(
-            model.getConfigurations().getInferenceEntityId(),
+            model.mlNodeDeploymentId(),
             TextExpansionConfigUpdate.EMPTY_UPDATE,
             inputs,
             inputType,
@@ -607,7 +619,7 @@ public void inferRerank(
         ActionListener<InferenceServiceResults> listener
     ) {
         var request = buildInferenceRequest(
-            model.getConfigurations().getInferenceEntityId(),
+            model.mlNodeDeploymentId(),
             new TextSimilarityConfigUpdate(query),
             inputs,
             inputType,
@@ -681,7 +693,7 @@ public void chunkedInfer(
 
             for (var batch : batchedRequests) {
                 var inferenceRequest = buildInferenceRequest(
-                    model.getConfigurations().getInferenceEntityId(),
+                    esModel.mlNodeDeploymentId(),
                     EmptyConfigUpdate.INSTANCE,
                     batch.batch().inputs(),
                     inputType,
@@ -858,4 +870,111 @@ static EmbeddingRequestChunker.EmbeddingType embeddingTypeFromTaskTypeAndSetting
             );
         };
     }
+
+
+    private void validateAgainstDeployment(
+        String modelId,
+        String deploymentId,
+        TaskType taskType,
+        ActionListener<ElasticsearchInternalServiceSettings.Builder> listener
+    ) {
+        getDeployment(deploymentId, listener.delegateFailureAndWrap((l, response) -> {
+            if (response.isPresent()) {
+                if (modelId.equals(response.get().getModelId()) == false) {
+                    listener.onFailure(
+                        new ElasticsearchStatusException(
+                            "Deployment [{}] uses model [{}] which does not match the model [{}] in the request.",
+                            RestStatus.BAD_REQUEST, // TODO better message
+                            deploymentId,
+                            response.get().getModelId(),
+                            modelId
+                        )
+                    );
+                    return;
+                }
+
+                var updatedSettings = new ElasticsearchInternalServiceSettings.Builder().setNumAllocations(
+                        response.get().getNumberOfAllocations()
+                    )
+                    .setNumThreads(response.get().getThreadsPerAllocation())
+                    .setAdaptiveAllocationsSettings(response.get().getAdaptiveAllocationsSettings())
+                    .setDeploymentId(deploymentId)
+                    .setModelId(modelId);
+
+                checkTaskTypeForMlNodeModel(
+                    response.get().getModelId(),
+                    taskType,
+                    l.delegateFailureAndWrap((l2, compatibleTaskType) -> { l2.onResponse(updatedSettings); })
+                );
+            }
+        }));
+    }
+
+    private void getDeployment(String deploymentId, ActionListener<Optional<AssignmentStats>> listener) {
+        client.execute(
+            GetTrainedModelsStatsAction.INSTANCE,
+            new GetTrainedModelsStatsAction.Request(deploymentId),
+            listener.delegateFailureAndWrap((l, response) -> {
+                l.onResponse(
+                    response.getResources()
+                        .results()
+                        .stream()
+                        .filter(s -> s.getDeploymentStats() != null && s.getDeploymentStats().getDeploymentId().equals(deploymentId))
+                        .map(GetTrainedModelsStatsAction.Response.TrainedModelStats::getDeploymentStats)
+                        .findFirst()
+                );
+            })
+        );
+    }
+
+    private void checkTaskTypeForMlNodeModel(String modelId, TaskType taskType, ActionListener<Boolean> listener) {
+        client.execute(
+            GetTrainedModelsAction.INSTANCE,
+            new GetTrainedModelsAction.Request(modelId),
+            listener.delegateFailureAndWrap((l, response) -> {
+                if (response.getResources().results().isEmpty()) {
+                    l.onFailure(new IllegalStateException("this shouldn't happen"));
+                    return;
+                }
+
+                var inferenceConfig = response.getResources().results().get(0).getInferenceConfig();
+                throwIfUnsupportedTaskType(modelId, taskType, inferenceConfig);
+                l.onResponse(Boolean.TRUE);
+            })
+        );
+    }
+
+    static void throwIfUnsupportedTaskType(String modelId, TaskType taskType, InferenceConfig inferenceConfig) {
+        var deploymentTaskType = inferenceConfigToTaskType(inferenceConfig);
+        if (deploymentTaskType == null) {
+            throw new ElasticsearchStatusException(
+                "Deployed model [{}] has type [{}] which does not map to any supported task types",
+                RestStatus.BAD_REQUEST,
+                modelId,
+                inferenceConfig.getWriteableName()
+            );
+        }
+        if (deploymentTaskType != taskType) {
+            throw new ElasticsearchStatusException(
+                "Deployed model [{}] with type [{}] does not match the requested task type [{}]",
+                RestStatus.BAD_REQUEST,
+                modelId,
+                inferenceConfig.getWriteableName(),
+                taskType
+            );
+        }
+
+    }
+
+    static TaskType inferenceConfigToTaskType(InferenceConfig config) {
+        if (config instanceof TextExpansionConfig) {
+            return TaskType.SPARSE_EMBEDDING;
+        } else if (config instanceof TextEmbeddingConfig) {
+            return TaskType.TEXT_EMBEDDING;
+        } else if (config instanceof TextSimilarityConfig) {
+            return TaskType.RERANK;
+        } else {
+            return null;
+        }
+    }
 }
diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceSettings.java
@@ -36,12 +36,14 @@ public class ElasticsearchInternalServiceSettings implements ServiceSettings {
     public static final String NUM_ALLOCATIONS = "num_allocations";
     public static final String NUM_THREADS = "num_threads";
     public static final String MODEL_ID = "model_id";
+    public static final String DEPLOYMENT_ID = "deployment_id";
     public static final String ADAPTIVE_ALLOCATIONS = "adaptive_allocations";
 
     private final Integer numAllocations;
     private final int numThreads;
     private final String modelId;
     private final AdaptiveAllocationsSettings adaptiveAllocationsSettings;
+    private final String deploymentId;
 
     public static ElasticsearchInternalServiceSettings fromPersistedMap(Map<String, Object> map) {
         return fromRequestMap(map).build();
@@ -95,12 +97,15 @@ protected static ElasticsearchInternalServiceSettings.Builder fromMap(
             );
         }
 
+        String deploymentId = extractOptionalString(map, DEPLOYMENT_ID, ModelConfigurations.SERVICE_SETTINGS, validationException);
+
         // if an error occurred while parsing, we'll set these to an invalid value, so we don't accidentally get a
         // null pointer when doing unboxing
         return new ElasticsearchInternalServiceSettings.Builder().setNumAllocations(numAllocations)
             .setNumThreads(Objects.requireNonNullElse(numThreads, FAILED_INT_PARSE_VALUE))
             .setModelId(modelId)
-            .setAdaptiveAllocationsSettings(adaptiveAllocationsSettings);
+            .setAdaptiveAllocationsSettings(adaptiveAllocationsSettings)
+            .setDeploymentId(deploymentId);
     }
 
     public ElasticsearchInternalServiceSettings(
@@ -113,13 +118,29 @@ public ElasticsearchInternalServiceSettings(
         this.numThreads = numThreads;
         this.modelId = Objects.requireNonNull(modelId);
         this.adaptiveAllocationsSettings = adaptiveAllocationsSettings;
+        this.deploymentId = null;
+    }
+
+    public ElasticsearchInternalServiceSettings(
+        Integer numAllocations,
+        int numThreads,
+        String modelId,
+        AdaptiveAllocationsSettings adaptiveAllocationsSettings,
+        String deploymentId
+    ) {
+        this.numAllocations = numAllocations;
+        this.numThreads = numThreads;
+        this.modelId = Objects.requireNonNull(modelId);
+        this.adaptiveAllocationsSettings = adaptiveAllocationsSettings;
+        this.deploymentId = deploymentId;
     }
 
     protected ElasticsearchInternalServiceSettings(ElasticsearchInternalServiceSettings other) {
         this.numAllocations = other.numAllocations;
         this.numThreads = other.numThreads;
         this.modelId = other.modelId;
         this.adaptiveAllocationsSettings = other.adaptiveAllocationsSettings;
+        this.deploymentId = other.deploymentId;
     }
 
     /**
@@ -145,6 +166,9 @@ public ElasticsearchInternalServiceSettings(StreamInput in) throws IOException {
         this.adaptiveAllocationsSettings = in.getTransportVersion().onOrAfter(TransportVersions.INFERENCE_ADAPTIVE_ALLOCATIONS)
             ? in.readOptionalWriteable(AdaptiveAllocationsSettings::new)
             : null;
+        this.deploymentId = in.getTransportVersion().onOrAfter(TransportVersions.ML_INFERENCE_ATTACH_TO_EXISTSING_DEPLOYMENT)
+            ? in.readOptionalString()
+            : null;
     }
 
     @Override
@@ -159,6 +183,9 @@ public void writeTo(StreamOutput out) throws IOException {
         if (out.getTransportVersion().onOrAfter(TransportVersions.INFERENCE_ADAPTIVE_ALLOCATIONS)) {
             out.writeOptionalWriteable(getAdaptiveAllocationsSettings());
         }
+        if (out.getTransportVersion().onOrAfter(TransportVersions.ML_INFERENCE_ATTACH_TO_EXISTSING_DEPLOYMENT)) {
+            out.writeOptionalString(deploymentId);
+        }
     }
 
     @Override
@@ -178,6 +205,10 @@ public AdaptiveAllocationsSettings getAdaptiveAllocationsSettings() {
         return adaptiveAllocationsSettings;
     }
 
+    public String getDeploymentId() {
+        return deploymentId;
+    }
+
     @Override
     public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
         builder.startObject();
@@ -195,6 +226,9 @@ protected void addInternalSettingsToXContent(XContentBuilder builder, Params par
         if (adaptiveAllocationsSettings != null) {
             builder.field(ADAPTIVE_ALLOCATIONS, adaptiveAllocationsSettings);
         }
+        if (deploymentId != null) {
+            builder.field(DEPLOYMENT_ID, deploymentId);
+        }
     }
 
     @Override
@@ -217,9 +251,10 @@ public static class Builder {
         private int numThreads;
         private String modelId;
         private AdaptiveAllocationsSettings adaptiveAllocationsSettings;
+        private String deploymentId;
 
         public ElasticsearchInternalServiceSettings build() {
-            return new ElasticsearchInternalServiceSettings(numAllocations, numThreads, modelId, adaptiveAllocationsSettings);
+            return new ElasticsearchInternalServiceSettings(numAllocations, numThreads, modelId, adaptiveAllocationsSettings, deploymentId);
         }
 
         public Builder setNumAllocations(Integer numAllocations) {
@@ -237,6 +272,11 @@ public Builder setModelId(String modelId) {
             return this;
         }
 
+        public Builder setDeploymentId(String deploymentId) {
+            this.deploymentId = deploymentId;
+            return this;
+        }
+
         public Builder setAdaptiveAllocationsSettings(AdaptiveAllocationsSettings adaptiveAllocationsSettings) {
             this.adaptiveAllocationsSettings = adaptiveAllocationsSettings;
             return this;
@@ -266,11 +306,12 @@ public boolean equals(Object o) {
         return Objects.equals(numAllocations, that.numAllocations)
             && numThreads == that.numThreads
             && Objects.equals(modelId, that.modelId)
-            && Objects.equals(adaptiveAllocationsSettings, that.adaptiveAllocationsSettings);
+            && Objects.equals(adaptiveAllocationsSettings, that.adaptiveAllocationsSettings)
+            && Objects.equals(deploymentId, that.deploymentId);
     }
 
     @Override
     public int hashCode() {
-        return Objects.hash(numAllocations, numThreads, modelId, adaptiveAllocationsSettings);
+        return Objects.hash(numAllocations, numThreads, modelId, adaptiveAllocationsSettings, deploymentId);
     }
 }
diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceSettingsTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceSettingsTests.java

Original file line number	Diff line number	Diff line change
`@@ -91,4 +91,8 @@ public ElasticsearchInternalServiceSettings getServiceSettings() {`
`91`	`91`	`public String toString() {`
`92`	`92`	`return Strings.toString(this.getConfigurations());`
`93`	`93`	`}`
	`94`	`+`
	`95`	`+ public String mlNodeDeploymentId() {`
	`96`	`+ return internalServiceSettings.getDeploymentId() == null ? getInferenceEntityId() : internalServiceSettings.getDeploymentId();`
	`97`	`+ }`
`94`	`98`	`}`