Skip to content

Commit dc25643

Browse files
authored
Python SDK exposes refresh_options for add_source. (#241)
1 parent 42ff48a commit dc25643

File tree

3 files changed

+26
-4
lines changed

3 files changed

+26
-4
lines changed

examples/gdrive_text_embedding/main.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from dotenv import load_dotenv
22

33
import cocoindex
4+
import datetime
45
import os
56

67
@cocoindex.flow_def(name="GoogleDriveTextEmbedding")
@@ -14,7 +15,9 @@ def gdrive_text_embedding_flow(flow_builder: cocoindex.FlowBuilder, data_scope:
1415
data_scope["documents"] = flow_builder.add_source(
1516
cocoindex.sources.GoogleDrive(
1617
service_account_credential_path=credential_path,
17-
root_folder_ids=root_folder_ids))
18+
root_folder_ids=root_folder_ids),
19+
refresh_options=cocoindex.SourceRefreshOptions(
20+
refresh_interval=datetime.timedelta(minutes=1)))
1821

1922
doc_embeddings = data_scope.add_collector()
2023

python/cocoindex/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
"""
44
from . import flow, functions, query, sources, storages, cli
55
from .flow import FlowBuilder, DataScope, DataSlice, Flow, flow_def
6-
from .flow import EvaluateAndDumpOptions, GeneratedField
6+
from .flow import EvaluateAndDumpOptions, GeneratedField, SourceRefreshOptions
77
from .llm import LlmSpec, LlmApiType
88
from .vector import VectorSimilarityMetric
99
from .lib import *

python/cocoindex/flow.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
import re
88
import inspect
9+
import datetime
910
from typing import Any, Callable, Sequence, TypeVar, get_origin
1011
from threading import Lock
1112
from enum import Enum
@@ -64,10 +65,17 @@ def _spec_kind(spec: Any) -> str:
6465

6566
def _dump_engine_object(v: Any) -> Any:
6667
"""Recursively dump an object for engine. Engine side uses `Pythonzized` to catch."""
67-
if isinstance(v, type) or get_origin(v) is not None:
68+
if v is None:
69+
return None
70+
elif isinstance(v, type) or get_origin(v) is not None:
6871
return encode_enriched_type(v)
6972
elif isinstance(v, Enum):
7073
return v.value
74+
elif isinstance(v, datetime.timedelta):
75+
total_secs = v.total_seconds()
76+
secs = int(total_secs)
77+
nanos = int((total_secs - secs) * 1e9)
78+
return {'secs': secs, 'nanos': nanos}
7179
elif hasattr(v, '__dict__'):
7280
return {k: _dump_engine_object(v) for k, v in v.__dict__.items()}
7381
elif isinstance(v, (list, tuple)):
@@ -314,6 +322,13 @@ def get_data_slice(self, v: Any) -> _engine.DataSlice:
314322
return v._state.engine_data_slice
315323
return self.engine_flow_builder.constant(encode_enriched_type(type(v)), v)
316324

325+
@dataclass
326+
class SourceRefreshOptions:
327+
"""
328+
Options for refreshing a source.
329+
"""
330+
refresh_interval: datetime.timedelta | None = None
331+
317332
class FlowBuilder:
318333
"""
319334
A flow builder is used to build a flow.
@@ -329,7 +344,10 @@ def __str__(self):
329344
def __repr__(self):
330345
return repr(self._state.engine_flow_builder)
331346

332-
def add_source(self, spec: op.SourceSpec, /, name: str | None = None) -> DataSlice:
347+
def add_source(self, spec: op.SourceSpec, /, *,
348+
name: str | None = None,
349+
refresh_options: SourceRefreshOptions | None = None,
350+
) -> DataSlice:
333351
"""
334352
Add a source to the flow.
335353
"""
@@ -341,6 +359,7 @@ def add_source(self, spec: op.SourceSpec, /, name: str | None = None) -> DataSli
341359
target_scope,
342360
self._state.field_name_builder.build_name(
343361
name, prefix=_to_snake_case(_spec_kind(spec))+'_'),
362+
_dump_engine_object(refresh_options),
344363
),
345364
name
346365
)

0 commit comments

Comments
 (0)