From b34f9433d882fd11e9965ac0950ffdeb3551a31c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?= Date: Tue, 18 Feb 2025 14:14:56 +0100 Subject: [PATCH] [Inference API] Adds chunking_settings to PUT inference API (#3781) * [Inference API] Adds chunking_settings to PUT inference API. * [Inference API] Make contrib. (cherry picked from commit bf2a5ca266971973db9525bd581cb7f3bf27d8ef) --- output/openapi/elasticsearch-openapi.json | 31 +++++++ .../elasticsearch-serverless-openapi.json | 31 +++++++ output/schema/schema-serverless.json | 89 ++++++++++++++++++- output/schema/schema.json | 89 ++++++++++++++++++- output/typescript/types.ts | 8 ++ specification/inference/_types/Services.ts | 37 ++++++++ 6 files changed, 277 insertions(+), 8 deletions(-) diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json index 993fec31e7..fac648e6ab 100644 --- a/output/openapi/elasticsearch-openapi.json +++ b/output/openapi/elasticsearch-openapi.json @@ -72993,6 +72993,9 @@ "inference._types:InferenceEndpoint": { "type": "object", "properties": { + "chunking_settings": { + "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings" + }, "service": { "description": "The service type", "type": "string" @@ -73009,6 +73012,34 @@ "service_settings" ] }, + "inference._types:InferenceChunkingSettings": { + "allOf": [ + { + "$ref": "#/components/schemas/inference._types:InferenceEndpoint" + }, + { + "type": "object", + "properties": { + "max_chunk_size": { + "description": "Specifies the maximum size of a chunk in words\nThis value cannot be higher than `300` or lower than `20` (for `sentence` strategy) or `10` (for `word` strategy)", + "type": "number" + }, + "overlap": { + "description": "Specifies the number of overlapping words for chunks\nOnly for `word` chunking strategy\nThis value cannot be higher than the half of `max_chunk_size`", + "type": "number" + }, + "sentence_overlap": { + "description": "Specifies the number of overlapping sentences for chunks\nOnly for `sentence` chunking strategy\nIt can be either `1` or `0`", + "type": "number" + }, + "strategy": { + "description": "Specifies the chunking strategy\nIt could be either `sentence` or `word`", + "type": "string" + } + } + } + ] + }, "inference._types:ServiceSettings": { "type": "object" }, diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json index 280bdf137e..1b4eec2f6b 100644 --- a/output/openapi/elasticsearch-serverless-openapi.json +++ b/output/openapi/elasticsearch-serverless-openapi.json @@ -46221,6 +46221,9 @@ "inference._types:InferenceEndpoint": { "type": "object", "properties": { + "chunking_settings": { + "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings" + }, "service": { "description": "The service type", "type": "string" @@ -46237,6 +46240,34 @@ "service_settings" ] }, + "inference._types:InferenceChunkingSettings": { + "allOf": [ + { + "$ref": "#/components/schemas/inference._types:InferenceEndpoint" + }, + { + "type": "object", + "properties": { + "max_chunk_size": { + "description": "Specifies the maximum size of a chunk in words\nThis value cannot be higher than `300` or lower than `20` (for `sentence` strategy) or `10` (for `word` strategy)", + "type": "number" + }, + "overlap": { + "description": "Specifies the number of overlapping words for chunks\nOnly for `word` chunking strategy\nThis value cannot be higher than the half of `max_chunk_size`", + "type": "number" + }, + "sentence_overlap": { + "description": "Specifies the number of overlapping sentences for chunks\nOnly for `sentence` chunking strategy\nIt can be either `1` or `0`", + "type": "number" + }, + "strategy": { + "description": "Specifies the chunking strategy\nIt could be either `sentence` or `word`", + "type": "string" + } + } + } + ] + }, "inference._types:ServiceSettings": { "type": "object" }, diff --git a/output/schema/schema-serverless.json b/output/schema/schema-serverless.json index 382bcf29a1..2898b6b5f9 100644 --- a/output/schema/schema-serverless.json +++ b/output/schema/schema-serverless.json @@ -96175,7 +96175,7 @@ "name": "ServiceSettings", "namespace": "inference._types" }, - "specLocation": "inference/_types/Services.ts#L55-L55", + "specLocation": "inference/_types/Services.ts#L92-L92", "type": { "kind": "user_defined_value" } @@ -96213,7 +96213,7 @@ "name": "TaskSettings", "namespace": "inference._types" }, - "specLocation": "inference/_types/Services.ts#L57-L57", + "specLocation": "inference/_types/Services.ts#L94-L94", "type": { "kind": "user_defined_value" } @@ -122841,7 +122841,7 @@ } } ], - "specLocation": "inference/_types/Services.ts#L41-L53" + "specLocation": "inference/_types/Services.ts#L46-L58" }, { "description": "Configuration options when storing the inference endpoint", @@ -122851,6 +122851,18 @@ "namespace": "inference._types" }, "properties": [ + { + "description": "Chunking configuration object", + "name": "chunking_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + } + } + }, { "description": "The service type", "name": "service", @@ -122888,7 +122900,76 @@ } } ], - "specLocation": "inference/_types/Services.ts#L23-L39" + "specLocation": "inference/_types/Services.ts#L24-L44" + }, + { + "description": "Chunking configuration object", + "inherits": { + "type": { + "name": "InferenceEndpoint", + "namespace": "inference._types" + } + }, + "kind": "interface", + "name": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + }, + "properties": [ + { + "description": "Specifies the maximum size of a chunk in words\nThis value cannot be higher than `300` or lower than `20` (for `sentence` strategy) or `10` (for `word` strategy)", + "name": "max_chunk_size", + "required": false, + "serverDefault": 250, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "Specifies the number of overlapping words for chunks\nOnly for `word` chunking strategy\nThis value cannot be higher than the half of `max_chunk_size`", + "name": "overlap", + "required": false, + "serverDefault": 100, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "Specifies the number of overlapping sentences for chunks\nOnly for `sentence` chunking strategy\nIt can be either `1` or `0`", + "name": "sentence_overlap", + "required": false, + "serverDefault": 1, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "Specifies the chunking strategy\nIt could be either `sentence` or `word`", + "name": "strategy", + "required": false, + "serverDefault": "sentence", + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ], + "specLocation": "inference/_types/Services.ts#L60-L90" }, { "description": "InferenceResult is an aggregation of mutually exclusive variants", diff --git a/output/schema/schema.json b/output/schema/schema.json index cabb171688..0077b2243f 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -144135,6 +144135,75 @@ } } }, + { + "kind": "interface", + "description": "Chunking configuration object", + "inherits": { + "type": { + "name": "InferenceEndpoint", + "namespace": "inference._types" + } + }, + "name": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + }, + "properties": [ + { + "description": "Specifies the maximum size of a chunk in words\nThis value cannot be higher than `300` or lower than `20` (for `sentence` strategy) or `10` (for `word` strategy)", + "name": "max_chunk_size", + "required": false, + "serverDefault": 250, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "Specifies the number of overlapping words for chunks\nOnly for `word` chunking strategy\nThis value cannot be higher than the half of `max_chunk_size`", + "name": "overlap", + "required": false, + "serverDefault": 100, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "Specifies the number of overlapping sentences for chunks\nOnly for `sentence` chunking strategy\nIt can be either `1` or `0`", + "name": "sentence_overlap", + "required": false, + "serverDefault": 1, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "Specifies the chunking strategy\nIt could be either `sentence` or `word`", + "name": "strategy", + "required": false, + "serverDefault": "sentence", + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ], + "specLocation": "inference/_types/Services.ts#L60-L90" + }, { "kind": "interface", "description": "Configuration options when storing the inference endpoint", @@ -144143,6 +144212,18 @@ "namespace": "inference._types" }, "properties": [ + { + "description": "Chunking configuration object", + "name": "chunking_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + } + } + }, { "description": "The service type", "name": "service", @@ -144180,7 +144261,7 @@ } } ], - "specLocation": "inference/_types/Services.ts#L23-L39" + "specLocation": "inference/_types/Services.ts#L24-L44" }, { "kind": "interface", @@ -144221,7 +144302,7 @@ } } ], - "specLocation": "inference/_types/Services.ts#L41-L53" + "specLocation": "inference/_types/Services.ts#L46-L58" }, { "kind": "interface", @@ -144357,7 +144438,7 @@ "name": "ServiceSettings", "namespace": "inference._types" }, - "specLocation": "inference/_types/Services.ts#L55-L55", + "specLocation": "inference/_types/Services.ts#L92-L92", "type": { "kind": "user_defined_value" } @@ -144416,7 +144497,7 @@ "name": "TaskSettings", "namespace": "inference._types" }, - "specLocation": "inference/_types/Services.ts#L57-L57", + "specLocation": "inference/_types/Services.ts#L94-L94", "type": { "kind": "user_defined_value" } diff --git a/output/typescript/types.ts b/output/typescript/types.ts index 884e92805e..81d272a2a1 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -12869,7 +12869,15 @@ export type InferenceDenseByteVector = byte[] export type InferenceDenseVector = float[] +export interface InferenceInferenceChunkingSettings extends InferenceInferenceEndpoint { + max_chunk_size?: integer + overlap?: integer + sentence_overlap?: integer + strategy?: string +} + export interface InferenceInferenceEndpoint { + chunking_settings?: InferenceInferenceChunkingSettings service: string service_settings: InferenceServiceSettings task_settings?: InferenceTaskSettings diff --git a/specification/inference/_types/Services.ts b/specification/inference/_types/Services.ts index 52d3c9f7e4..53024633f5 100644 --- a/specification/inference/_types/Services.ts +++ b/specification/inference/_types/Services.ts @@ -18,12 +18,17 @@ */ import { UserDefinedValue } from '@spec_utils/UserDefinedValue' +import { integer } from '@_types/Numeric' import { TaskType } from '../_types/TaskType' /** * Configuration options when storing the inference endpoint */ export class InferenceEndpoint { + /** + * Chunking configuration object + */ + chunking_settings?: InferenceChunkingSettings /** * The service type */ @@ -52,6 +57,38 @@ export class InferenceEndpointInfo extends InferenceEndpoint { task_type: TaskType } +/** + * Chunking configuration object + */ +export class InferenceChunkingSettings extends InferenceEndpoint { + /** + * Specifies the maximum size of a chunk in words + * This value cannot be higher than `300` or lower than `20` (for `sentence` strategy) or `10` (for `word` strategy) + * @server_default 250 + */ + max_chunk_size?: integer + /** + * Specifies the number of overlapping words for chunks + * Only for `word` chunking strategy + * This value cannot be higher than the half of `max_chunk_size` + * @server_default 100 + */ + overlap?: integer + /** + * Specifies the number of overlapping sentences for chunks + * Only for `sentence` chunking strategy + * It can be either `1` or `0` + * @server_default 1 + */ + sentence_overlap?: integer + /** + * Specifies the chunking strategy + * It could be either `sentence` or `word` + * @server_default sentence + */ + strategy?: string +} + export type ServiceSettings = UserDefinedValue export type TaskSettings = UserDefinedValue