From 1858e482d07368433c60b589f53abcbf8923d310 Mon Sep 17 00:00:00 2001 From: Robert Jaszczurek Date: Fri, 14 Feb 2025 15:47:50 +0100 Subject: [PATCH 1/2] Update start model deployment & update model deployment apis --- output/openapi/elasticsearch-openapi.json | 24 ++++- .../elasticsearch-serverless-openapi.json | 24 ++++- output/schema/schema.json | 87 ++++++++++++------- output/typescript/types.ts | 4 + .../ml.start_trained_model_deployment.json | 4 + specification/ml/_types/TrainedModel.ts | 12 +++ .../MlStartTrainedModelDeploymentRequest.ts | 10 +++ .../MlUpdateTrainedModelDeploymentRequest.ts | 8 ++ 8 files changed, 140 insertions(+), 33 deletions(-) diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json index 6ec158e75c..757fe3bb02 100644 --- a/output/openapi/elasticsearch-openapi.json +++ b/output/openapi/elasticsearch-openapi.json @@ -23089,7 +23089,7 @@ { "in": "query", "name": "number_of_allocations", - "description": "The number of model allocations on each node where the model is deployed.\nAll allocations on a node share the same copy of the model in memory but use\na separate set of threads to evaluate the model.\nIncreasing this value generally increases the throughput.\nIf this setting is greater than the number of hardware threads\nit will automatically be changed to a value less than the number of hardware threads.", + "description": "The number of model allocations on each node where the model is deployed.\nAll allocations on a node share the same copy of the model in memory but use\na separate set of threads to evaluate the model.\nIncreasing this value generally increases the throughput.\nIf this setting is greater than the number of hardware threads\nit will automatically be changed to a value less than the number of hardware threads.\nIf adaptive_allocations is enabled, do not set this value, because it’s automatically set.", "deprecated": false, "schema": { "type": "number" @@ -23147,6 +23147,20 @@ "style": "form" } ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "adaptive_allocations": { + "$ref": "#/components/schemas/ml._types:AdaptiveAllocationsSettings" + } + } + } + } + } + }, "responses": { "200": { "description": "", @@ -24120,8 +24134,11 @@ "type": "object", "properties": { "number_of_allocations": { - "description": "The number of model allocations on each node where the model is deployed.\nAll allocations on a node share the same copy of the model in memory but use\na separate set of threads to evaluate the model.\nIncreasing this value generally increases the throughput.\nIf this setting is greater than the number of hardware threads\nit will automatically be changed to a value less than the number of hardware threads.", + "description": "The number of model allocations on each node where the model is deployed.\nAll allocations on a node share the same copy of the model in memory but use\na separate set of threads to evaluate the model.\nIncreasing this value generally increases the throughput.\nIf this setting is greater than the number of hardware threads\nit will automatically be changed to a value less than the number of hardware threads.\nIf adaptive_allocations is enabled, do not set this value, because it’s automatically set.", "type": "number" + }, + "adaptive_allocations": { + "$ref": "#/components/schemas/ml._types:AdaptiveAllocationsSettings" } } } @@ -80793,12 +80810,15 @@ "type": "object", "properties": { "enabled": { + "description": "If true, adaptive_allocations is enabled", "type": "boolean" }, "min_number_of_allocations": { + "description": "Specifies the minimum number of allocations to scale to.\nIf set, it must be greater than or equal to 0.\nIf not defined, the deployment scales to 0.", "type": "number" }, "max_number_of_allocations": { + "description": "Specifies the maximum number of allocations to scale to.\nIf set, it must be greater than or equal to min_number_of_allocations.", "type": "number" } }, diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json index 5761bc6349..241b380c05 100644 --- a/output/openapi/elasticsearch-serverless-openapi.json +++ b/output/openapi/elasticsearch-serverless-openapi.json @@ -13095,7 +13095,7 @@ { "in": "query", "name": "number_of_allocations", - "description": "The number of model allocations on each node where the model is deployed.\nAll allocations on a node share the same copy of the model in memory but use\na separate set of threads to evaluate the model.\nIncreasing this value generally increases the throughput.\nIf this setting is greater than the number of hardware threads\nit will automatically be changed to a value less than the number of hardware threads.", + "description": "The number of model allocations on each node where the model is deployed.\nAll allocations on a node share the same copy of the model in memory but use\na separate set of threads to evaluate the model.\nIncreasing this value generally increases the throughput.\nIf this setting is greater than the number of hardware threads\nit will automatically be changed to a value less than the number of hardware threads.\nIf adaptive_allocations is enabled, do not set this value, because it’s automatically set.", "deprecated": false, "schema": { "type": "number" @@ -13153,6 +13153,20 @@ "style": "form" } ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "adaptive_allocations": { + "$ref": "#/components/schemas/ml._types:AdaptiveAllocationsSettings" + } + } + } + } + } + }, "responses": { "200": { "description": "", @@ -14047,8 +14061,11 @@ "type": "object", "properties": { "number_of_allocations": { - "description": "The number of model allocations on each node where the model is deployed.\nAll allocations on a node share the same copy of the model in memory but use\na separate set of threads to evaluate the model.\nIncreasing this value generally increases the throughput.\nIf this setting is greater than the number of hardware threads\nit will automatically be changed to a value less than the number of hardware threads.", + "description": "The number of model allocations on each node where the model is deployed.\nAll allocations on a node share the same copy of the model in memory but use\na separate set of threads to evaluate the model.\nIncreasing this value generally increases the throughput.\nIf this setting is greater than the number of hardware threads\nit will automatically be changed to a value less than the number of hardware threads.\nIf adaptive_allocations is enabled, do not set this value, because it’s automatically set.", "type": "number" + }, + "adaptive_allocations": { + "$ref": "#/components/schemas/ml._types:AdaptiveAllocationsSettings" } } } @@ -52034,12 +52051,15 @@ "type": "object", "properties": { "enabled": { + "description": "If true, adaptive_allocations is enabled", "type": "boolean" }, "min_number_of_allocations": { + "description": "Specifies the minimum number of allocations to scale to.\nIf set, it must be greater than or equal to 0.\nIf not defined, the deployment scales to 0.", "type": "number" }, "max_number_of_allocations": { + "description": "Specifies the maximum number of allocations to scale to.\nIf set, it must be greater than or equal to min_number_of_allocations.", "type": "number" } }, diff --git a/output/schema/schema.json b/output/schema/schema.json index ad9f981b8c..6755a21237 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -156171,6 +156171,7 @@ }, "properties": [ { + "description": "If true, adaptive_allocations is enabled", "name": "enabled", "required": true, "type": { @@ -156182,6 +156183,7 @@ } }, { + "description": "Specifies the minimum number of allocations to scale to.\nIf set, it must be greater than or equal to 0.\nIf not defined, the deployment scales to 0.", "name": "min_number_of_allocations", "required": false, "type": { @@ -156193,6 +156195,7 @@ } }, { + "description": "Specifies the maximum number of allocations to scale to.\nIf set, it must be greater than or equal to min_number_of_allocations.", "name": "max_number_of_allocations", "required": false, "type": { @@ -156204,7 +156207,7 @@ } } ], - "specLocation": "ml/_types/TrainedModel.ts#L109-L113" + "specLocation": "ml/_types/TrainedModel.ts#L109-L125" }, { "kind": "interface", @@ -161552,7 +161555,7 @@ "name": "DeploymentAllocationState", "namespace": "ml._types" }, - "specLocation": "ml/_types/TrainedModel.ts#L318-L331" + "specLocation": "ml/_types/TrainedModel.ts#L330-L343" }, { "kind": "enum", @@ -161578,7 +161581,7 @@ "name": "DeploymentAssignmentState", "namespace": "ml._types" }, - "specLocation": "ml/_types/TrainedModel.ts#L333-L350" + "specLocation": "ml/_types/TrainedModel.ts#L345-L362" }, { "kind": "interface", @@ -162623,7 +162626,7 @@ } } ], - "specLocation": "ml/_types/TrainedModel.ts#L261-L275" + "specLocation": "ml/_types/TrainedModel.ts#L273-L287" }, { "kind": "interface", @@ -164702,7 +164705,7 @@ } } ], - "specLocation": "ml/_types/TrainedModel.ts#L244-L259" + "specLocation": "ml/_types/TrainedModel.ts#L256-L271" }, { "kind": "interface", @@ -165995,7 +165998,7 @@ "name": "RoutingState", "namespace": "ml._types" }, - "specLocation": "ml/_types/TrainedModel.ts#L395-L416" + "specLocation": "ml/_types/TrainedModel.ts#L407-L428" }, { "kind": "enum", @@ -166690,7 +166693,7 @@ } } ], - "specLocation": "ml/_types/TrainedModel.ts#L277-L284" + "specLocation": "ml/_types/TrainedModel.ts#L289-L296" }, { "kind": "interface", @@ -166727,7 +166730,7 @@ } } ], - "specLocation": "ml/_types/TrainedModel.ts#L286-L291" + "specLocation": "ml/_types/TrainedModel.ts#L298-L303" }, { "kind": "interface", @@ -166773,7 +166776,7 @@ } } ], - "specLocation": "ml/_types/TrainedModel.ts#L293-L300" + "specLocation": "ml/_types/TrainedModel.ts#L305-L312" }, { "kind": "interface", @@ -166886,7 +166889,7 @@ } } ], - "specLocation": "ml/_types/TrainedModel.ts#L447-L464" + "specLocation": "ml/_types/TrainedModel.ts#L459-L476" }, { "kind": "interface", @@ -166944,7 +166947,7 @@ } } ], - "specLocation": "ml/_types/TrainedModel.ts#L418-L436" + "specLocation": "ml/_types/TrainedModel.ts#L430-L448" }, { "kind": "interface", @@ -167077,7 +167080,7 @@ } } ], - "specLocation": "ml/_types/TrainedModel.ts#L357-L393" + "specLocation": "ml/_types/TrainedModel.ts#L369-L405" }, { "kind": "interface", @@ -167336,7 +167339,7 @@ } } ], - "specLocation": "ml/_types/TrainedModel.ts#L191-L227" + "specLocation": "ml/_types/TrainedModel.ts#L203-L239" }, { "kind": "interface", @@ -167361,7 +167364,7 @@ } } ], - "specLocation": "ml/_types/TrainedModel.ts#L229-L232" + "specLocation": "ml/_types/TrainedModel.ts#L241-L244" }, { "kind": "interface", @@ -167438,7 +167441,7 @@ } } ], - "specLocation": "ml/_types/TrainedModel.ts#L234-L242" + "specLocation": "ml/_types/TrainedModel.ts#L246-L254" }, { "kind": "interface", @@ -167484,7 +167487,7 @@ } } ], - "specLocation": "ml/_types/TrainedModel.ts#L438-L445" + "specLocation": "ml/_types/TrainedModel.ts#L450-L457" }, { "kind": "interface", @@ -167753,7 +167756,7 @@ } } ], - "specLocation": "ml/_types/TrainedModel.ts#L144-L189" + "specLocation": "ml/_types/TrainedModel.ts#L156-L201" }, { "kind": "interface", @@ -168211,7 +168214,7 @@ } } ], - "specLocation": "ml/_types/TrainedModel.ts#L115-L135" + "specLocation": "ml/_types/TrainedModel.ts#L127-L147" }, { "kind": "interface", @@ -168232,7 +168235,7 @@ } } ], - "specLocation": "ml/_types/TrainedModel.ts#L466-L468" + "specLocation": "ml/_types/TrainedModel.ts#L478-L480" }, { "kind": "interface", @@ -168253,7 +168256,7 @@ } } ], - "specLocation": "ml/_types/TrainedModel.ts#L470-L472" + "specLocation": "ml/_types/TrainedModel.ts#L482-L484" }, { "kind": "interface", @@ -168287,7 +168290,7 @@ } } ], - "specLocation": "ml/_types/TrainedModel.ts#L474-L483" + "specLocation": "ml/_types/TrainedModel.ts#L486-L495" }, { "kind": "interface", @@ -168321,7 +168324,7 @@ } } ], - "specLocation": "ml/_types/TrainedModel.ts#L137-L142" + "specLocation": "ml/_types/TrainedModel.ts#L149-L154" }, { "kind": "interface", @@ -168434,7 +168437,7 @@ "name": "TrainedModelType", "namespace": "ml._types" }, - "specLocation": "ml/_types/TrainedModel.ts#L302-L316" + "specLocation": "ml/_types/TrainedModel.ts#L314-L328" }, { "kind": "enum", @@ -168450,7 +168453,7 @@ "name": "TrainingPriority", "namespace": "ml._types" }, - "specLocation": "ml/_types/TrainedModel.ts#L352-L355" + "specLocation": "ml/_types/TrainedModel.ts#L364-L367" }, { "kind": "interface", @@ -179152,7 +179155,21 @@ "CommonQueryParameters" ], "body": { - "kind": "no_body" + "kind": "properties", + "properties": [ + { + "description": "Adaptive allocations configuration. When enabled, the number of allocations\nis set based on the current load.\nIf adaptive_allocations is enabled, do not set the number of allocations manually.", + "name": "adaptive_allocations", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "AdaptiveAllocationsSettings", + "namespace": "ml._types" + } + } + } + ] }, "description": "Start a trained model deployment.\nIt allocates the model to every machine learning node.", "inherits": { @@ -179210,7 +179227,7 @@ } }, { - "description": "The number of model allocations on each node where the model is deployed.\nAll allocations on a node share the same copy of the model in memory but use\na separate set of threads to evaluate the model.\nIncreasing this value generally increases the throughput.\nIf this setting is greater than the number of hardware threads\nit will automatically be changed to a value less than the number of hardware threads.", + "description": "The number of model allocations on each node where the model is deployed.\nAll allocations on a node share the same copy of the model in memory but use\na separate set of threads to evaluate the model.\nIncreasing this value generally increases the throughput.\nIf this setting is greater than the number of hardware threads\nit will automatically be changed to a value less than the number of hardware threads.\nIf adaptive_allocations is enabled, do not set this value, because it’s automatically set.", "name": "number_of_allocations", "required": false, "serverDefault": 1, @@ -179287,7 +179304,7 @@ } } ], - "specLocation": "ml/start_trained_model_deployment/MlStartTrainedModelDeploymentRequest.ts#L29-L101" + "specLocation": "ml/start_trained_model_deployment/MlStartTrainedModelDeploymentRequest.ts#L30-L111" }, { "kind": "response", @@ -181126,7 +181143,7 @@ "kind": "properties", "properties": [ { - "description": "The number of model allocations on each node where the model is deployed.\nAll allocations on a node share the same copy of the model in memory but use\na separate set of threads to evaluate the model.\nIncreasing this value generally increases the throughput.\nIf this setting is greater than the number of hardware threads\nit will automatically be changed to a value less than the number of hardware threads.", + "description": "The number of model allocations on each node where the model is deployed.\nAll allocations on a node share the same copy of the model in memory but use\na separate set of threads to evaluate the model.\nIncreasing this value generally increases the throughput.\nIf this setting is greater than the number of hardware threads\nit will automatically be changed to a value less than the number of hardware threads.\nIf adaptive_allocations is enabled, do not set this value, because it’s automatically set.", "name": "number_of_allocations", "required": false, "serverDefault": 1, @@ -181137,6 +181154,18 @@ "namespace": "_types" } } + }, + { + "description": "Adaptive allocations configuration. When enabled, the number of allocations\nis set based on the current load.\nIf adaptive_allocations is enabled, do not set the number of allocations manually.", + "name": "adaptive_allocations", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "AdaptiveAllocationsSettings", + "namespace": "ml._types" + } + } } ] }, @@ -181180,7 +181209,7 @@ } } ], - "specLocation": "ml/update_trained_model_deployment/MlUpdateTrainedModelDeploymentRequest.ts#L24-L70" + "specLocation": "ml/update_trained_model_deployment/MlUpdateTrainedModelDeploymentRequest.ts#L25-L78" }, { "kind": "response", diff --git a/output/typescript/types.ts b/output/typescript/types.ts index 5db4dbcfad..1d3ab49452 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -16471,6 +16471,9 @@ export interface MlStartTrainedModelDeploymentRequest extends RequestBase { threads_per_allocation?: integer timeout?: Duration wait_for?: MlDeploymentAllocationState + body?: { + adaptive_allocations?: MlAdaptiveAllocationsSettings + } } export interface MlStartTrainedModelDeploymentResponse { @@ -16660,6 +16663,7 @@ export interface MlUpdateTrainedModelDeploymentRequest extends RequestBase { number_of_allocations?: integer body?: { number_of_allocations?: integer + adaptive_allocations?: MlAdaptiveAllocationsSettings } } diff --git a/specification/_json_spec/ml.start_trained_model_deployment.json b/specification/_json_spec/ml.start_trained_model_deployment.json index 8588362cbe..d4fc693862 100644 --- a/specification/_json_spec/ml.start_trained_model_deployment.json +++ b/specification/_json_spec/ml.start_trained_model_deployment.json @@ -73,6 +73,10 @@ "options": ["starting", "started", "fully_allocated"], "default": "started" } + }, + "body": { + "description": "The settings for the trained model deployment", + "required": false } } } diff --git a/specification/ml/_types/TrainedModel.ts b/specification/ml/_types/TrainedModel.ts index d21c37e678..9374dd09b0 100644 --- a/specification/ml/_types/TrainedModel.ts +++ b/specification/ml/_types/TrainedModel.ts @@ -107,8 +107,20 @@ export class TrainedModelDeploymentStats { } export class AdaptiveAllocationsSettings { + /** + * If true, adaptive_allocations is enabled + */ enabled: boolean + /** + * Specifies the minimum number of allocations to scale to. + * If set, it must be greater than or equal to 0. + * If not defined, the deployment scales to 0. + */ min_number_of_allocations?: integer + /** + * Specifies the maximum number of allocations to scale to. + * If set, it must be greater than or equal to min_number_of_allocations. + */ max_number_of_allocations?: integer } diff --git a/specification/ml/start_trained_model_deployment/MlStartTrainedModelDeploymentRequest.ts b/specification/ml/start_trained_model_deployment/MlStartTrainedModelDeploymentRequest.ts index 73ee5a16de..0ecb580896 100644 --- a/specification/ml/start_trained_model_deployment/MlStartTrainedModelDeploymentRequest.ts +++ b/specification/ml/start_trained_model_deployment/MlStartTrainedModelDeploymentRequest.ts @@ -22,6 +22,7 @@ import { ByteSize, Id } from '@_types/common' import { integer } from '@_types/Numeric' import { Duration } from '@_types/Time' import { + AdaptiveAllocationsSettings, DeploymentAllocationState, TrainingPriority } from '../_types/TrainedModel' @@ -68,6 +69,7 @@ export interface Request extends RequestBase { * Increasing this value generally increases the throughput. * If this setting is greater than the number of hardware threads * it will automatically be changed to a value less than the number of hardware threads. + * If adaptive_allocations is enabled, do not set this value, because it’s automatically set. * @server_default 1 */ number_of_allocations?: integer @@ -98,4 +100,12 @@ export interface Request extends RequestBase { */ wait_for?: DeploymentAllocationState } + body: { + /** + * Adaptive allocations configuration. When enabled, the number of allocations + * is set based on the current load. + * If adaptive_allocations is enabled, do not set the number of allocations manually. + */ + adaptive_allocations?: AdaptiveAllocationsSettings + } } diff --git a/specification/ml/update_trained_model_deployment/MlUpdateTrainedModelDeploymentRequest.ts b/specification/ml/update_trained_model_deployment/MlUpdateTrainedModelDeploymentRequest.ts index e80e4e60b7..cc16fbef0f 100644 --- a/specification/ml/update_trained_model_deployment/MlUpdateTrainedModelDeploymentRequest.ts +++ b/specification/ml/update_trained_model_deployment/MlUpdateTrainedModelDeploymentRequest.ts @@ -20,6 +20,7 @@ import { RequestBase } from '@_types/Base' import { Id } from '@_types/common' import { integer } from '@_types/Numeric' +import { AdaptiveAllocationsSettings } from '@ml/_types/TrainedModel' /** * Update a trained model deployment. @@ -63,8 +64,15 @@ export interface Request extends RequestBase { * Increasing this value generally increases the throughput. * If this setting is greater than the number of hardware threads * it will automatically be changed to a value less than the number of hardware threads. + * If adaptive_allocations is enabled, do not set this value, because it’s automatically set. * @server_default 1 */ number_of_allocations?: integer + /** + * Adaptive allocations configuration. When enabled, the number of allocations + * is set based on the current load. + * If adaptive_allocations is enabled, do not set the number of allocations manually. + */ + adaptive_allocations?: AdaptiveAllocationsSettings } } From 696f0b24e116796ddd5fd104832da226a7736be0 Mon Sep 17 00:00:00 2001 From: Robert Jaszczurek Date: Fri, 14 Feb 2025 15:58:20 +0100 Subject: [PATCH 2/2] fix formatting --- .../MlUpdateTrainedModelDeploymentRequest.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specification/ml/update_trained_model_deployment/MlUpdateTrainedModelDeploymentRequest.ts b/specification/ml/update_trained_model_deployment/MlUpdateTrainedModelDeploymentRequest.ts index cc16fbef0f..db137d1999 100644 --- a/specification/ml/update_trained_model_deployment/MlUpdateTrainedModelDeploymentRequest.ts +++ b/specification/ml/update_trained_model_deployment/MlUpdateTrainedModelDeploymentRequest.ts @@ -17,10 +17,10 @@ * under the License. */ +import { AdaptiveAllocationsSettings } from '@ml/_types/TrainedModel' import { RequestBase } from '@_types/Base' import { Id } from '@_types/common' import { integer } from '@_types/Numeric' -import { AdaptiveAllocationsSettings } from '@ml/_types/TrainedModel' /** * Update a trained model deployment.