Skip to content

Commit c81f68d

Browse files
authored
Leverage pythonize to bypass JSON serialization in Python<->Rust (#45)
1 parent a1818af commit c81f68d

File tree

11 files changed

+60
-66
lines changed

11 files changed

+60
-66
lines changed

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,3 +37,4 @@ itertools = "0.14.0"
3737
derivative = "2.2.0"
3838
async-lock = "3.4.0"
3939
hex = "0.4.3"
40+
pythonize = "0.23.0"

python/cocoindex/flow.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
from __future__ import annotations
66

77
import re
8-
import json
98
import inspect
109
from typing import Any, Callable, Sequence, TypeVar
1110
from threading import Lock
@@ -61,8 +60,8 @@ def _create_data_slice(
6160
def _spec_kind(spec: Any) -> str:
6261
return spec.__class__.__name__
6362

64-
def _spec_json_dump(spec: Any) -> str:
65-
return json.dumps(spec.__dict__)
63+
def _spec_dump(spec: Any) -> dict[str, Any]:
64+
return spec.__dict__
6665

6766
T = TypeVar('T')
6867

@@ -162,7 +161,7 @@ def transform(self, fn_spec: op.FunctionSpec, /, name: str | None = None) -> Dat
162161
lambda target_scope, name:
163162
flow_builder_state.engine_flow_builder.transform(
164163
_spec_kind(fn_spec),
165-
_spec_json_dump(fn_spec),
164+
_spec_dump(fn_spec),
166165
args,
167166
target_scope,
168167
flow_builder_state.field_name_builder.build_name(
@@ -253,8 +252,8 @@ def export(self, name: str, target_spec: op.StorageSpec, /, *,
253252
{"field_name": field_name, "metric": metric.value}
254253
for field_name, metric in vector_index]
255254
self._flow_builder_state.engine_flow_builder.export(
256-
name, _spec_kind(target_spec), _spec_json_dump(target_spec),
257-
json.dumps(index_options), self._engine_data_collector)
255+
name, _spec_kind(target_spec), _spec_dump(target_spec),
256+
index_options, self._engine_data_collector)
258257

259258

260259
_flow_name_builder = _NameBuilder()
@@ -294,7 +293,7 @@ def add_source(self, spec: op.SourceSpec, /, name: str | None = None) -> DataSli
294293
self._state,
295294
lambda target_scope, name: self._state.engine_flow_builder.add_source(
296295
_spec_kind(spec),
297-
_spec_json_dump(spec),
296+
_spec_dump(spec),
298297
target_scope,
299298
self._state.field_name_builder.build_name(
300299
name, prefix=_to_snake_case(_spec_kind(spec))+'_'),

python/cocoindex/functions.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
"""All builtin functions."""
22
from typing import Annotated, Any
33

4-
import json
54
import sentence_transformers
65
from .typing import Float32, Vector, TypeAttr
76
from . import op
@@ -31,11 +30,11 @@ class SentenceTransformerEmbedExecutor:
3130
spec: SentenceTransformerEmbed
3231
_model: sentence_transformers.SentenceTransformer
3332

34-
def analyze(self, text = None):
33+
def analyze(self, text):
3534
args = self.spec.args or {}
3635
self._model = sentence_transformers.SentenceTransformer(self.spec.model, **args)
3736
dim = self._model.get_sentence_embedding_dimension()
38-
return Annotated[list[Float32], Vector(dim=dim), TypeAttr("cocoindex.io/vector_origin_text", json.loads(text.analyzed_value))]
37+
return Annotated[list[Float32], Vector(dim=dim), TypeAttr("cocoindex.io/vector_origin_text", text.analyzed_value)]
3938

4039
def __call__(self, text: str) -> list[Float32]:
4140
return self._model.encode(text).tolist()

python/cocoindex/lib.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def from_env(cls) -> Self:
3535

3636
def init(settings: Settings):
3737
"""Initialize the cocoindex library."""
38-
_engine.init(json.dumps(settings.__dict__))
38+
_engine.init(settings.__dict__)
3939

4040
@dataclass
4141
class ServerSettings:
@@ -62,7 +62,7 @@ def start_server(settings: ServerSettings):
6262
"""Start the cocoindex server."""
6363
flow.ensure_all_flows_built()
6464
query.ensure_all_handlers_built()
65-
_engine.start_server(json.dumps(settings.__dict__))
65+
_engine.start_server(settings.__dict__)
6666

6767
def stop():
6868
"""Stop the cocoindex library."""

python/cocoindex/op.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,8 @@
22
Facilities for defining cocoindex operations.
33
"""
44
import inspect
5-
import json
65

7-
from typing import get_type_hints, Protocol, Callable, dataclass_transform
6+
from typing import get_type_hints, Protocol, Any, Callable, dataclass_transform
87
from dataclasses import dataclass
98
from enum import Enum
109
from threading import Lock
@@ -54,8 +53,8 @@ def __init__(self, spec_cls: type, executor_cls: type):
5453
self._spec_cls = spec_cls
5554
self._executor_cls = executor_cls
5655

57-
def __call__(self, spec_json: str, *args, **kwargs):
58-
spec = self._spec_cls(**json.loads(spec_json))
56+
def __call__(self, spec: dict[str, Any], *args, **kwargs):
57+
spec = self._spec_cls(**spec)
5958
executor = self._executor_cls(spec)
6059
result_type = executor.analyze(*args, **kwargs)
6160
return (dump_type(result_type), executor)

python/cocoindex/query.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from typing import Callable, Any
22
from dataclasses import dataclass
33
from threading import Lock
4-
import json
54

65
from . import flow as fl
76
from . import vector
@@ -52,7 +51,7 @@ def _lazy_handler() -> _engine.SimpleSemanticsQueryHandler:
5251
engine_handler = _engine.SimpleSemanticsQueryHandler(
5352
flow.internal_flow(), target_name,
5453
fl.TransientFlow(query_transform_flow, [str]).internal_flow(),
55-
json.dumps(default_similarity_metric.value))
54+
default_similarity_metric.value)
5655
engine_handler.register_query_handler(name)
5756
return engine_handler
5857
self._lazy_query_handler = _lazy_handler
@@ -71,11 +70,9 @@ def search(self, query: str, limit: int, vector_field_name: str | None = None,
7170
"""
7271
Search the index with the given query, limit, vector field name, and similarity metric.
7372
"""
74-
internal_results_json, internal_info_json = self.internal_handler().search(
73+
internal_results, internal_info = self.internal_handler().search(
7574
query, limit, vector_field_name,
7675
similarity_matric.value if similarity_matric is not None else None)
77-
internal_results = json.loads(internal_results_json)
78-
internal_info = json.loads(internal_info_json)
7976
fields = [field['name'] for field in internal_results['fields']]
8077
results = [QueryResult(data=dict(zip(fields, result['data'])), score=result['score']) for result in internal_results['results']]
8178
info = SimpleSemanticsQueryInfo(

python/cocoindex/setup.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import json
21
from typing import NamedTuple
32

43
from . import flow
@@ -9,7 +8,7 @@ class CheckSetupStatusOptions(NamedTuple):
98

109
def check_setup_status(options: CheckSetupStatusOptions) -> _engine.SetupStatusCheck:
1110
flow.ensure_all_flows_built()
12-
return _engine.check_setup_status(json.dumps(options))
11+
return _engine.check_setup_status(options)
1312

1413
def apply_setup_changes(status_check: _engine.SetupStatusCheck):
1514
_engine.apply_setup_changes(status_check)

python/cocoindex/typing.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import json
21
import typing
32
import collections
43
from typing import Annotated, NamedTuple, Any
@@ -67,7 +66,7 @@ def _basic_type_to_json_value(t, metadata):
6766

6867
return type_json
6968

70-
def _enriched_type_to_json_value(t):
69+
def _enriched_type_to_json_value(t) -> dict[str, Any] | None:
7170
if t is None:
7271
return None
7372
t, metadata = _get_origin_type_and_metadata(t)
@@ -83,8 +82,8 @@ def _enriched_type_to_json_value(t):
8382
return enriched_type_json
8483

8584

86-
def dump_type(t) -> str:
85+
def dump_type(t) -> dict[str, Any] | None:
8786
"""
8887
Convert a Python type to a CocoIndex's type in JSON.
8988
"""
90-
return json.dumps(_enriched_type_to_json_value(t))
89+
return _enriched_type_to_json_value(t)

src/builder/flow_builder.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -386,7 +386,7 @@ impl FlowBuilder {
386386
pub fn add_source(
387387
&mut self,
388388
kind: String,
389-
op_spec: py::Json<serde_json::Map<String, serde_json::Value>>,
389+
op_spec: py::Pythonized<serde_json::Map<String, serde_json::Value>>,
390390
target_scope: Option<DataScopeRef>,
391391
name: String,
392392
) -> PyResult<DataSlice> {
@@ -423,7 +423,7 @@ impl FlowBuilder {
423423
pub fn add_direct_input(
424424
&mut self,
425425
name: String,
426-
value_type: py::Json<schema::EnrichedValueType>,
426+
value_type: py::Pythonized<schema::EnrichedValueType>,
427427
) -> PyResult<DataSlice> {
428428
let mut root_data_scope = self.root_data_scope.lock().unwrap();
429429
root_data_scope
@@ -453,7 +453,7 @@ impl FlowBuilder {
453453
pub fn transform(
454454
&mut self,
455455
kind: String,
456-
op_spec: py::Json<serde_json::Map<String, serde_json::Value>>,
456+
op_spec: py::Pythonized<serde_json::Map<String, serde_json::Value>>,
457457
args: Vec<(DataSlice, Option<String>)>,
458458
target_scope: Option<DataScopeRef>,
459459
name: String,
@@ -555,8 +555,8 @@ impl FlowBuilder {
555555
&mut self,
556556
name: String,
557557
kind: String,
558-
op_spec: py::Json<serde_json::Map<String, serde_json::Value>>,
559-
index_options: py::Json<spec::IndexOptions>,
558+
op_spec: py::Pythonized<serde_json::Map<String, serde_json::Value>>,
559+
index_options: py::Pythonized<spec::IndexOptions>,
560560
input: &DataCollector,
561561
) -> PyResult<()> {
562562
let spec = spec::OpSpec {

src/ops/py_factory.rs

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -113,26 +113,26 @@ fn value_from_py_object<'py>(
113113

114114
#[pyclass(name = "OpArgSchema")]
115115
pub struct PyOpArgSchema {
116-
value_type: crate::py::Json<schema::EnrichedValueType>,
117-
analyzed_value: crate::py::Json<plan::AnalyzedValueMapping>,
116+
value_type: crate::py::Pythonized<schema::EnrichedValueType>,
117+
analyzed_value: crate::py::Pythonized<plan::AnalyzedValueMapping>,
118118
}
119119

120120
#[pymethods]
121121
impl PyOpArgSchema {
122122
#[getter]
123-
fn value_type(&self) -> &crate::py::Json<schema::EnrichedValueType> {
123+
fn value_type(&self) -> &crate::py::Pythonized<schema::EnrichedValueType> {
124124
&self.value_type
125125
}
126126

127127
#[getter]
128-
fn analyzed_value(&self) -> &crate::py::Json<plan::AnalyzedValueMapping> {
128+
fn analyzed_value(&self) -> &crate::py::Pythonized<plan::AnalyzedValueMapping> {
129129
&self.analyzed_value
130130
}
131131

132132
fn validate_arg(
133133
&self,
134134
name: &str,
135-
typ: crate::py::Json<schema::EnrichedValueType>,
135+
typ: crate::py::Pythonized<schema::EnrichedValueType>,
136136
) -> PyResult<()> {
137137
if self.value_type.0.typ != typ.0.typ {
138138
return Err(PyException::new_err(format!(
@@ -222,13 +222,13 @@ impl SimpleFunctionFactory for PyFunctionFactory {
222222
)> {
223223
let (result_type, executor, kw_args_names, num_positional_args) =
224224
Python::with_gil(|py| -> anyhow::Result<_> {
225-
let mut args = vec![crate::py::Json(spec).into_py_any(py)?];
225+
let mut args = vec![crate::py::Pythonized(spec).into_py_any(py)?];
226226
let mut kwargs = vec![];
227227
let mut num_positional_args = 0;
228228
for arg in input_schema.into_iter() {
229229
let py_arg_schema = PyOpArgSchema {
230-
value_type: crate::py::Json(arg.value_type.clone()),
231-
analyzed_value: crate::py::Json(arg.analyzed_value.clone()),
230+
value_type: crate::py::Pythonized(arg.value_type.clone()),
231+
analyzed_value: crate::py::Pythonized(arg.analyzed_value.clone()),
232232
};
233233
match arg.name.0 {
234234
Some(name) => {
@@ -251,7 +251,7 @@ impl SimpleFunctionFactory for PyFunctionFactory {
251251
Some(&kwargs.into_py_dict(py)?),
252252
)?;
253253
let (result_type, executor) = result
254-
.extract::<(crate::py::Json<schema::EnrichedValueType>, Py<PyAny>)>(py)?;
254+
.extract::<(crate::py::Pythonized<schema::EnrichedValueType>, Py<PyAny>)>(py)?;
255255
Ok((
256256
result_type.into_inner(),
257257
executor,

0 commit comments

Comments
 (0)