diff --git a/output/schema/schema.json b/output/schema/schema.json index 9f7672a096..6069ef2df0 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -50602,6 +50602,40 @@ } } }, + { + "kind": "interface", + "name": { + "name": "ChunkRescorer", + "namespace": "_types" + }, + "properties": [ + { + "description": "The number of chunks per document to evaluate for reranking.", + "name": "size", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "Chunking settings to apply", + "name": "chunking_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "ChunkRescorerChunkingSettings", + "namespace": "_types.mapping" + } + } + } + ], + "specLocation": "_types/Retriever.ts#L176-L181" + }, { "kind": "type_alias", "name": { @@ -53682,7 +53716,7 @@ } } ], - "specLocation": "_types/Retriever.ts#L85-L89" + "specLocation": "_types/Retriever.ts#L86-L90" }, { "kind": "type_alias", @@ -53938,7 +53972,7 @@ } } ], - "specLocation": "_types/Retriever.ts#L115-L133" + "specLocation": "_types/Retriever.ts#L116-L134" }, { "kind": "interface", @@ -54241,7 +54275,7 @@ } } ], - "specLocation": "_types/Retriever.ts#L68-L75" + "specLocation": "_types/Retriever.ts#L69-L76" }, { "kind": "type_alias", @@ -55283,7 +55317,7 @@ } } ], - "specLocation": "_types/Retriever.ts#L77-L83" + "specLocation": "_types/Retriever.ts#L78-L84" }, { "kind": "type_alias", @@ -55668,7 +55702,7 @@ } } ], - "specLocation": "_types/Retriever.ts#L135-L144" + "specLocation": "_types/Retriever.ts#L136-L145" }, { "kind": "interface", @@ -56076,7 +56110,7 @@ } } ], - "specLocation": "_types/Retriever.ts#L62-L66" + "specLocation": "_types/Retriever.ts#L63-L67" }, { "kind": "enum", @@ -56197,7 +56231,7 @@ } } ], - "specLocation": "_types/Retriever.ts#L53-L60" + "specLocation": "_types/Retriever.ts#L54-L61" }, { "kind": "interface", @@ -56303,7 +56337,7 @@ } } ], - "specLocation": "_types/Retriever.ts#L28-L51", + "specLocation": "_types/Retriever.ts#L29-L52", "variants": { "kind": "container" } @@ -56436,7 +56470,7 @@ } } ], - "specLocation": "_types/Retriever.ts#L159-L168" + "specLocation": "_types/Retriever.ts#L165-L174" }, { "kind": "type_alias", @@ -56511,7 +56545,7 @@ "name": "ScoreNormalizer", "namespace": "_types" }, - "specLocation": "_types/Retriever.ts#L91-L95" + "specLocation": "_types/Retriever.ts#L92-L96" }, { "kind": "interface", @@ -58055,7 +58089,7 @@ } } ], - "specLocation": "_types/Retriever.ts#L97-L100" + "specLocation": "_types/Retriever.ts#L98-L101" }, { "kind": "interface", @@ -58131,7 +58165,7 @@ } } ], - "specLocation": "_types/Retriever.ts#L102-L113" + "specLocation": "_types/Retriever.ts#L103-L114" }, { "kind": "interface", @@ -58475,7 +58509,7 @@ } }, { - "description": "The text snippet used as the basis for similarity comparison", + "description": "The text snippet used as the basis for similarity comparison.", "name": "inference_text", "required": true, "type": { @@ -58487,7 +58521,7 @@ } }, { - "description": "The document field to be used for text similarity comparisons. This field should contain the text that will be evaluated against the inference_text", + "description": "The document field to be used for text similarity comparisons. This field should contain the text that will be evaluated against the inference_text.", "name": "field", "required": true, "type": { @@ -58497,9 +58531,30 @@ "namespace": "_builtins" } } + }, + { + "availability": { + "serverless": { + "stability": "beta" + }, + "stack": { + "since": "9.2.0", + "stability": "beta" + } + }, + "description": "Whether to rescore on only the best matching chunks.", + "name": "chunk_rescorer", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "ChunkRescorer", + "namespace": "_types" + } + } } ], - "specLocation": "_types/Retriever.ts#L146-L157" + "specLocation": "_types/Retriever.ts#L147-L163" }, { "kind": "enum", @@ -82463,6 +82518,117 @@ ], "specLocation": "_types/mapping/core.ts#L188-L191" }, + { + "kind": "interface", + "attachedBehaviors": [ + "OverloadOf" + ], + "behaviors": [ + { + "generics": [ + { + "kind": "instance_of", + "type": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + } + } + ], + "type": { + "name": "OverloadOf", + "namespace": "_spec_utils" + } + } + ], + "name": { + "name": "ChunkRescorerChunkingSettings", + "namespace": "_types.mapping" + }, + "properties": [ + { + "description": "The chunking strategy: `sentence`, `word`, `none` or `recursive`.\n\n * If `strategy` is set to `recursive`, you must also specify:\n\n- `max_chunk_size`\n- either `separators` or`separator_group`\n\nLearn more about different chunking strategies in the linked documentation.", + "extDocId": "chunking-strategies", + "extDocUrl": "https://www.elastic.co/docs/explore-analyze/elastic-inference/inference-api#chunking-strategies", + "name": "strategy", + "required": false, + "serverDefault": "sentence", + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "Only applicable to the `recursive` strategy and required when using it.\n\nSets a predefined list of separators in the saved chunking settings based on the selected text type.\nValues can be `markdown` or `plaintext`.\n\nUsing this parameter is an alternative to manually specifying a custom `separators` list.", + "name": "separator_group", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "Only applicable to the `recursive` strategy and required when using it.\n\nA list of strings used as possible split points when chunking text.\n\nEach string can be a plain string or a regular expression (regex) pattern.\nThe system tries each separator in order to split the text, starting from the first item in the list.\n\nAfter splitting, it attempts to recombine smaller pieces into larger chunks that stay within\nthe `max_chunk_size` limit, to reduce the total number of chunks generated.", + "name": "separators", + "required": false, + "type": { + "kind": "array_of", + "value": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + }, + { + "description": "The maximum size of a chunk in words.\nThis value cannot be lower than `20` (for `sentence` strategy) or `10` (for `word` strategy).\nThis value should not exceed the window size for the associated model.", + "name": "max_chunk_size", + "required": true, + "serverDefault": 250, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "The number of overlapping words for chunks.\nIt is applicable only to a `word` chunking strategy.\nThis value cannot be higher than half the `max_chunk_size` value.", + "name": "overlap", + "required": false, + "serverDefault": 100, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "The number of overlapping sentences for chunks.\nIt is applicable only for a `sentence` chunking strategy.\nIt can be either `1` or `0`.", + "name": "sentence_overlap", + "required": false, + "serverDefault": 1, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + } + ], + "specLocation": "_types/mapping/ChunkingSettings.ts#L38-L50" + }, { "kind": "interface", "attachedBehaviors": [ diff --git a/output/typescript/types.ts b/output/typescript/types.ts index eaf4c86ca9..aae8e2050b 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -2222,6 +2222,11 @@ export type Bytes = 'b' | 'kb' | 'mb' | 'gb' | 'tb' | 'pb' export type CategoryId = string +export interface ChunkRescorer { + size?: integer + chunking_settings?: MappingChunkRescorerChunkingSettings +} + export type ClusterAlias = string export interface ClusterDetails { @@ -3020,6 +3025,7 @@ export interface TextSimilarityReranker extends RetrieverBase { inference_id?: string inference_text: string field: string + chunk_rescorer?: ChunkRescorer } export type ThreadType = 'cpu' | 'wait' | 'block' | 'gpu' | 'mem' @@ -5589,6 +5595,15 @@ export interface MappingByteNumberProperty extends MappingNumberPropertyBase { null_value?: byte } +export interface MappingChunkRescorerChunkingSettings { + strategy?: string + separator_group?: string + separators?: string[] + max_chunk_size: integer + overlap?: integer + sentence_overlap?: integer +} + export interface MappingChunkingSettings { strategy: string separator_group?: string diff --git a/specification/_types/Retriever.ts b/specification/_types/Retriever.ts index 0e4f4f4294..7ab388bc35 100644 --- a/specification/_types/Retriever.ts +++ b/specification/_types/Retriever.ts @@ -23,6 +23,7 @@ import { FieldCollapse } from '@global/search/_types/FieldCollapse' import { Rescore } from '@global/search/_types/rescoring' import { UserDefinedValue } from '@spec_utils/UserDefinedValue' import { Id, IndexName } from './common' +import { ChunkRescorerChunkingSettings } from './mapping/ChunkingSettings' import { QueryContainer } from './query_dsl/abstractions' /** @@ -150,10 +151,15 @@ export class TextSimilarityReranker extends RetrieverBase { rank_window_size?: integer /** Unique identifier of the inference endpoint created using the inference API. */ inference_id?: string - /** The text snippet used as the basis for similarity comparison */ + /** The text snippet used as the basis for similarity comparison. */ inference_text: string - /** The document field to be used for text similarity comparisons. This field should contain the text that will be evaluated against the inference_text */ + /** The document field to be used for text similarity comparisons. This field should contain the text that will be evaluated against the inference_text. */ field: string + /** Whether to rescore on only the best matching chunks. + * @availability stack since=9.2.0 stability=beta + * @availability serverless stability=beta + */ + chunk_rescorer?: ChunkRescorer } export class RuleRetriever extends RetrieverBase { @@ -166,3 +172,10 @@ export class RuleRetriever extends RetrieverBase { /** This value determines the size of the individual result set. */ rank_window_size?: integer } + +export class ChunkRescorer { + /** The number of chunks per document to evaluate for reranking. */ + size?: integer + /** Chunking settings to apply */ + chunking_settings?: ChunkRescorerChunkingSettings +} diff --git a/specification/_types/mapping/ChunkingSettings.ts b/specification/_types/mapping/ChunkingSettings.ts index 75454d0956..6f6efdcec1 100644 --- a/specification/_types/mapping/ChunkingSettings.ts +++ b/specification/_types/mapping/ChunkingSettings.ts @@ -34,3 +34,19 @@ export class ChunkingSettings implements OverloadOf { sentence_overlap?: integer } + +export class ChunkRescorerChunkingSettings + implements OverloadOf +{ + strategy?: string + + separator_group?: string + + separators?: string[] + + max_chunk_size: integer + + overlap?: integer + + sentence_overlap?: integer +}