diff --git a/docs/docs/core/custom_function.mdx b/docs/docs/core/custom_function.mdx index 841ac2fd0..1f60ed245 100644 --- a/docs/docs/core/custom_function.mdx +++ b/docs/docs/core/custom_function.mdx @@ -148,6 +148,17 @@ Custom functions take the following additional parameters: When the version is changed, the function will be re-executed even if cache is enabled. It's required to be set if `cache` is `True`. +* `arg_relationship: tuple[ArgRelationship, str]`: It specifies the relationship between an input argument and the output, + e.g. `(ArgRelationship.CHUNKS_BASE_TEXT, "content")` means the output is chunks for the text represented by the + input argument with name `content`. + This provides metadata for tools, e.g. CocoInsight. + Currently the following attributes are supported: + + * `ArgRelationship.CHUNKS_BASE_TEXT`: + The output is chunks for the text represented by the input argument. In this case, the output is expected to be a *Table*, whose each row represents a text chunk, and the first column has type *Range*, representing the range of the text chunk. + * `ArgRelationship.EMBEDDING_ORIGIN_TEXT`: The output is embedding vector for the text represented by the input argument. The output is expected to be a *Vector*. + * `ArgRelationship.RECTS_BASE_IMAGE`: The output is rectangles for the image represented by the input argument. The output is expected to be a *Table*, whose each row represents a rectangle, and the first column has type *Struct*, with fields `min_x`, `min_y`, `max_x`, `max_y` to represent the coordinates of the rectangle. + For example: diff --git a/python/cocoindex/functions.py b/python/cocoindex/functions.py index 3b88d26aa..9b0763c13 100644 --- a/python/cocoindex/functions.py +++ b/python/cocoindex/functions.py @@ -70,7 +70,7 @@ class SentenceTransformerEmbed(op.FunctionSpec): gpu=True, cache=True, behavior_version=1, - arg_relationship=(op.ArgRelationship.VECTOR_ORIGIN_TEXT, "text"), + arg_relationship=(op.ArgRelationship.EMBEDDING_ORIGIN_TEXT, "text"), ) class SentenceTransformerEmbedExecutor: """Executor for SentenceTransformerEmbed.""" diff --git a/python/cocoindex/op.py b/python/cocoindex/op.py index a256027cb..d4bc51e08 100644 --- a/python/cocoindex/op.py +++ b/python/cocoindex/op.py @@ -91,7 +91,7 @@ def __call__( class ArgRelationship(Enum): """Specifies the relationship between an input argument and the output.""" - VECTOR_ORIGIN_TEXT = _COCOINDEX_ATTR_PREFIX + "vector_origin_text" + EMBEDDING_ORIGIN_TEXT = _COCOINDEX_ATTR_PREFIX + "embedding_origin_text" CHUNKS_BASE_TEXT = _COCOINDEX_ATTR_PREFIX + "chunk_base_text" RECTS_BASE_IMAGE = _COCOINDEX_ATTR_PREFIX + "rects_base_image" diff --git a/src/base/field_attrs.rs b/src/base/field_attrs.rs index 1cc80fee8..b4b1a8236 100644 --- a/src/base/field_attrs.rs +++ b/src/base/field_attrs.rs @@ -15,4 +15,4 @@ pub static CONTENT_MIME_TYPE: &str = concatcp!(COCOINDEX_PREFIX, "content_mime_t pub static CHUNK_BASE_TEXT: &str = concatcp!(COCOINDEX_PREFIX, "chunk_base_text"); /// Base text for an embedding vector. -pub static _VECTOR_ORIGIN_TEXT: &str = concatcp!(COCOINDEX_PREFIX, "vector_origin_text"); +pub static _EMBEDDING_ORIGIN_TEXT: &str = concatcp!(COCOINDEX_PREFIX, "embedding_origin_text");