From 15fef28a48ab45b88d3fd0f73885f9127701b77f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?= Date: Tue, 18 Feb 2025 10:46:56 +0100 Subject: [PATCH 1/2] [Inference API] Adds chunking_settings to PUT inference API. --- specification/inference/_types/Services.ts | 32 ++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/specification/inference/_types/Services.ts b/specification/inference/_types/Services.ts index 52d3c9f7e4..c90fec8280 100644 --- a/specification/inference/_types/Services.ts +++ b/specification/inference/_types/Services.ts @@ -19,6 +19,7 @@ import { UserDefinedValue } from '@spec_utils/UserDefinedValue' import { TaskType } from '../_types/TaskType' +import { integer } from '@_types/Numeric' /** * Configuration options when storing the inference endpoint @@ -52,6 +53,37 @@ export class InferenceEndpointInfo extends InferenceEndpoint { task_type: TaskType } +/** + * Chunking configuration object + */ +export class InferenceChunkingSettings extends InferenceEndpoint { + /** + * Specifies the maximum size of a chunk in words + * This value cannot be higher than `300` or lower than `20` (for `sentence` strategy) or `10` (for `word` strategy) + * @server_default 250 + */ + max_chunk_size?: integer + /** + * Specifies the number of overlapping words for chunks + * Only for `word` chunking strategy + * This value cannot be higher than the half of `max_chunk_size` + * @server_default 100 + */ + overlap?: integer + /** + * Specifies the number of overlapping sentences for chunks + * Only for `sentence` chunking strategy + * It can be either `1` or `0` + * @server_default 1 + */ + sentence_overlap?: integer + /** + * Specifies the chunking strategy + * It could be either `sentence` or `word` + */ + strategy?: string +} + export type ServiceSettings = UserDefinedValue export type TaskSettings = UserDefinedValue From fe84f797f3aff9e19b3fd23a3cb53a2fbdd15da3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?= Date: Tue, 18 Feb 2025 11:28:31 +0100 Subject: [PATCH 2/2] [Inference API] Make contrib. --- output/openapi/elasticsearch-openapi.json | 31 +++++++ .../elasticsearch-serverless-openapi.json | 31 +++++++ output/schema/schema-serverless.json | 89 ++++++++++++++++++- output/schema/schema.json | 89 ++++++++++++++++++- output/typescript/types.ts | 8 ++ specification/inference/_types/Services.ts | 15 ++-- 6 files changed, 250 insertions(+), 13 deletions(-) diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json index 6ec158e75c..7344ad8a20 100644 --- a/output/openapi/elasticsearch-openapi.json +++ b/output/openapi/elasticsearch-openapi.json @@ -73783,6 +73783,9 @@ "inference._types:InferenceEndpoint": { "type": "object", "properties": { + "chunking_settings": { + "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings" + }, "service": { "description": "The service type", "type": "string" @@ -73799,6 +73802,34 @@ "service_settings" ] }, + "inference._types:InferenceChunkingSettings": { + "allOf": [ + { + "$ref": "#/components/schemas/inference._types:InferenceEndpoint" + }, + { + "type": "object", + "properties": { + "max_chunk_size": { + "description": "Specifies the maximum size of a chunk in words\nThis value cannot be higher than `300` or lower than `20` (for `sentence` strategy) or `10` (for `word` strategy)", + "type": "number" + }, + "overlap": { + "description": "Specifies the number of overlapping words for chunks\nOnly for `word` chunking strategy\nThis value cannot be higher than the half of `max_chunk_size`", + "type": "number" + }, + "sentence_overlap": { + "description": "Specifies the number of overlapping sentences for chunks\nOnly for `sentence` chunking strategy\nIt can be either `1` or `0`", + "type": "number" + }, + "strategy": { + "description": "Specifies the chunking strategy\nIt could be either `sentence` or `word`", + "type": "string" + } + } + } + ] + }, "inference._types:ServiceSettings": { "type": "object" }, diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json index 5761bc6349..233056621c 100644 --- a/output/openapi/elasticsearch-serverless-openapi.json +++ b/output/openapi/elasticsearch-serverless-openapi.json @@ -46339,6 +46339,9 @@ "inference._types:InferenceEndpoint": { "type": "object", "properties": { + "chunking_settings": { + "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings" + }, "service": { "description": "The service type", "type": "string" @@ -46355,6 +46358,34 @@ "service_settings" ] }, + "inference._types:InferenceChunkingSettings": { + "allOf": [ + { + "$ref": "#/components/schemas/inference._types:InferenceEndpoint" + }, + { + "type": "object", + "properties": { + "max_chunk_size": { + "description": "Specifies the maximum size of a chunk in words\nThis value cannot be higher than `300` or lower than `20` (for `sentence` strategy) or `10` (for `word` strategy)", + "type": "number" + }, + "overlap": { + "description": "Specifies the number of overlapping words for chunks\nOnly for `word` chunking strategy\nThis value cannot be higher than the half of `max_chunk_size`", + "type": "number" + }, + "sentence_overlap": { + "description": "Specifies the number of overlapping sentences for chunks\nOnly for `sentence` chunking strategy\nIt can be either `1` or `0`", + "type": "number" + }, + "strategy": { + "description": "Specifies the chunking strategy\nIt could be either `sentence` or `word`", + "type": "string" + } + } + } + ] + }, "inference._types:ServiceSettings": { "type": "object" }, diff --git a/output/schema/schema-serverless.json b/output/schema/schema-serverless.json index dd4fafcc1f..ea0be1f2e0 100644 --- a/output/schema/schema-serverless.json +++ b/output/schema/schema-serverless.json @@ -97210,7 +97210,7 @@ "name": "ServiceSettings", "namespace": "inference._types" }, - "specLocation": "inference/_types/Services.ts#L55-L55", + "specLocation": "inference/_types/Services.ts#L92-L92", "type": { "kind": "user_defined_value" } @@ -97248,7 +97248,7 @@ "name": "TaskSettings", "namespace": "inference._types" }, - "specLocation": "inference/_types/Services.ts#L57-L57", + "specLocation": "inference/_types/Services.ts#L94-L94", "type": { "kind": "user_defined_value" } @@ -123971,7 +123971,7 @@ } } ], - "specLocation": "inference/_types/Services.ts#L41-L53" + "specLocation": "inference/_types/Services.ts#L46-L58" }, { "description": "Configuration options when storing the inference endpoint", @@ -123981,6 +123981,18 @@ "namespace": "inference._types" }, "properties": [ + { + "description": "Chunking configuration object", + "name": "chunking_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + } + } + }, { "description": "The service type", "name": "service", @@ -124018,7 +124030,76 @@ } } ], - "specLocation": "inference/_types/Services.ts#L23-L39" + "specLocation": "inference/_types/Services.ts#L24-L44" + }, + { + "description": "Chunking configuration object", + "inherits": { + "type": { + "name": "InferenceEndpoint", + "namespace": "inference._types" + } + }, + "kind": "interface", + "name": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + }, + "properties": [ + { + "description": "Specifies the maximum size of a chunk in words\nThis value cannot be higher than `300` or lower than `20` (for `sentence` strategy) or `10` (for `word` strategy)", + "name": "max_chunk_size", + "required": false, + "serverDefault": 250, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "Specifies the number of overlapping words for chunks\nOnly for `word` chunking strategy\nThis value cannot be higher than the half of `max_chunk_size`", + "name": "overlap", + "required": false, + "serverDefault": 100, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "Specifies the number of overlapping sentences for chunks\nOnly for `sentence` chunking strategy\nIt can be either `1` or `0`", + "name": "sentence_overlap", + "required": false, + "serverDefault": 1, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "Specifies the chunking strategy\nIt could be either `sentence` or `word`", + "name": "strategy", + "required": false, + "serverDefault": "sentence", + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ], + "specLocation": "inference/_types/Services.ts#L60-L90" }, { "description": "InferenceResult is an aggregation of mutually exclusive variants", diff --git a/output/schema/schema.json b/output/schema/schema.json index ad9f981b8c..a4e24687f2 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -146080,6 +146080,75 @@ } } }, + { + "kind": "interface", + "description": "Chunking configuration object", + "inherits": { + "type": { + "name": "InferenceEndpoint", + "namespace": "inference._types" + } + }, + "name": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + }, + "properties": [ + { + "description": "Specifies the maximum size of a chunk in words\nThis value cannot be higher than `300` or lower than `20` (for `sentence` strategy) or `10` (for `word` strategy)", + "name": "max_chunk_size", + "required": false, + "serverDefault": 250, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "Specifies the number of overlapping words for chunks\nOnly for `word` chunking strategy\nThis value cannot be higher than the half of `max_chunk_size`", + "name": "overlap", + "required": false, + "serverDefault": 100, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "Specifies the number of overlapping sentences for chunks\nOnly for `sentence` chunking strategy\nIt can be either `1` or `0`", + "name": "sentence_overlap", + "required": false, + "serverDefault": 1, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "Specifies the chunking strategy\nIt could be either `sentence` or `word`", + "name": "strategy", + "required": false, + "serverDefault": "sentence", + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ], + "specLocation": "inference/_types/Services.ts#L60-L90" + }, { "kind": "interface", "description": "Configuration options when storing the inference endpoint", @@ -146088,6 +146157,18 @@ "namespace": "inference._types" }, "properties": [ + { + "description": "Chunking configuration object", + "name": "chunking_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + } + } + }, { "description": "The service type", "name": "service", @@ -146125,7 +146206,7 @@ } } ], - "specLocation": "inference/_types/Services.ts#L23-L39" + "specLocation": "inference/_types/Services.ts#L24-L44" }, { "kind": "interface", @@ -146166,7 +146247,7 @@ } } ], - "specLocation": "inference/_types/Services.ts#L41-L53" + "specLocation": "inference/_types/Services.ts#L46-L58" }, { "kind": "interface", @@ -146316,7 +146397,7 @@ "name": "ServiceSettings", "namespace": "inference._types" }, - "specLocation": "inference/_types/Services.ts#L55-L55", + "specLocation": "inference/_types/Services.ts#L92-L92", "type": { "kind": "user_defined_value" } @@ -146375,7 +146456,7 @@ "name": "TaskSettings", "namespace": "inference._types" }, - "specLocation": "inference/_types/Services.ts#L57-L57", + "specLocation": "inference/_types/Services.ts#L94-L94", "type": { "kind": "user_defined_value" } diff --git a/output/typescript/types.ts b/output/typescript/types.ts index 5db4dbcfad..f431a37def 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -13001,7 +13001,15 @@ export type InferenceDenseByteVector = byte[] export type InferenceDenseVector = float[] +export interface InferenceInferenceChunkingSettings extends InferenceInferenceEndpoint { + max_chunk_size?: integer + overlap?: integer + sentence_overlap?: integer + strategy?: string +} + export interface InferenceInferenceEndpoint { + chunking_settings?: InferenceInferenceChunkingSettings service: string service_settings: InferenceServiceSettings task_settings?: InferenceTaskSettings diff --git a/specification/inference/_types/Services.ts b/specification/inference/_types/Services.ts index c90fec8280..53024633f5 100644 --- a/specification/inference/_types/Services.ts +++ b/specification/inference/_types/Services.ts @@ -18,13 +18,17 @@ */ import { UserDefinedValue } from '@spec_utils/UserDefinedValue' -import { TaskType } from '../_types/TaskType' import { integer } from '@_types/Numeric' +import { TaskType } from '../_types/TaskType' /** * Configuration options when storing the inference endpoint */ export class InferenceEndpoint { + /** + * Chunking configuration object + */ + chunking_settings?: InferenceChunkingSettings /** * The service type */ @@ -77,10 +81,11 @@ export class InferenceChunkingSettings extends InferenceEndpoint { * @server_default 1 */ sentence_overlap?: integer - /** - * Specifies the chunking strategy - * It could be either `sentence` or `word` - */ + /** + * Specifies the chunking strategy + * It could be either `sentence` or `word` + * @server_default sentence + */ strategy?: string }