Skip to content

Commit 80f6071

Browse files
authored
Support ExtractByMistral function spec declaration in Python SDK. (#88)
1 parent e5b1dd5 commit 80f6071

File tree

3 files changed

+35
-6
lines changed

3 files changed

+35
-6
lines changed

python/cocoindex/flow.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,15 @@ def _create_data_slice(
6060
def _spec_kind(spec: Any) -> str:
6161
return spec.__class__.__name__
6262

63-
def _spec_dump(spec: Any) -> dict[str, Any]:
64-
return spec.__dict__
63+
def _spec_value_dump(spec: Any) -> Any:
64+
"""Recursively dump a spec object and its nested attributes to a dictionary."""
65+
if hasattr(spec, '__dict__'):
66+
return {k: _spec_value_dump(v) for k, v in spec.__dict__.items()}
67+
elif isinstance(spec, (list, tuple)):
68+
return [_spec_value_dump(item) for item in spec]
69+
elif isinstance(spec, dict):
70+
return {k: _spec_value_dump(v) for k, v in spec.items()}
71+
return spec
6572

6673
T = TypeVar('T')
6774

@@ -161,7 +168,7 @@ def transform(self, fn_spec: op.FunctionSpec, /, name: str | None = None) -> Dat
161168
lambda target_scope, name:
162169
flow_builder_state.engine_flow_builder.transform(
163170
_spec_kind(fn_spec),
164-
_spec_dump(fn_spec),
171+
_spec_value_dump(fn_spec),
165172
args,
166173
target_scope,
167174
flow_builder_state.field_name_builder.build_name(
@@ -252,7 +259,7 @@ def export(self, name: str, target_spec: op.StorageSpec, /, *,
252259
{"field_name": field_name, "metric": metric.value}
253260
for field_name, metric in vector_index]
254261
self._flow_builder_state.engine_flow_builder.export(
255-
name, _spec_kind(target_spec), _spec_dump(target_spec),
262+
name, _spec_kind(target_spec), _spec_value_dump(target_spec),
256263
index_options, self._engine_data_collector)
257264

258265

@@ -293,7 +300,7 @@ def add_source(self, spec: op.SourceSpec, /, name: str | None = None) -> DataSli
293300
self._state,
294301
lambda target_scope, name: self._state.engine_flow_builder.add_source(
295302
_spec_kind(spec),
296-
_spec_dump(spec),
303+
_spec_value_dump(spec),
297304
target_scope,
298305
self._state.field_name_builder.build_name(
299306
name, prefix=_to_snake_case(_spec_kind(spec))+'_'),

python/cocoindex/functions.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
"""All builtin functions."""
2+
from dataclasses import dataclass
23
from typing import Annotated, Any
34

45
import sentence_transformers
@@ -11,6 +12,20 @@ class SplitRecursively(op.FunctionSpec):
1112
chunk_overlap: int
1213
language: str | None = None
1314

15+
@dataclass
16+
class MistralModelSpec:
17+
"""A specification for a Mistral model."""
18+
model_id: str
19+
isq_type: str
20+
21+
class ExtractByMistral(op.FunctionSpec):
22+
"""Extract information from a text using a Mistral model."""
23+
24+
model: MistralModelSpec
25+
# Expected to be generated by `cocoindex.typing.encode_enriched_type()`
26+
output_type: dict[str, Any]
27+
instructions: str | None = None
28+
1429
class SentenceTransformerEmbed(op.FunctionSpec):
1530
"""
1631
`SentenceTransformerEmbed` embeds a text into a vector space using the [SentenceTransformer](https://huggingface.co/sentence-transformers) library.

python/cocoindex/typing.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import collections
33
import dataclasses
44
import types
5-
from typing import Annotated, NamedTuple, Any, TypeVar, TYPE_CHECKING
5+
from typing import Annotated, NamedTuple, Any, TypeVar, TYPE_CHECKING, overload
66

77
class Vector(NamedTuple):
88
dim: int | None
@@ -182,6 +182,13 @@ def encode_enriched_type_info(enriched_type_info: AnalyzedTypeInfo) -> dict[str,
182182

183183
return encoded
184184

185+
@overload
186+
def encode_enriched_type(t: None) -> None:
187+
...
188+
189+
@overload
190+
def encode_enriched_type(t: Any) -> dict[str, Any]:
191+
...
185192

186193
def encode_enriched_type(t) -> dict[str, Any] | None:
187194
"""

0 commit comments

Comments
 (0)