Skip to content

Commit 2c28bed

Browse files
committed
feat: update to use standalone cocoindex cli
1 parent 62a714e commit 2c28bed

File tree

3 files changed

+147
-66
lines changed

3 files changed

+147
-66
lines changed

pyproject.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@ dependencies = ["sentence-transformers>=3.3.1", "click>=8.1.8", "rich>=14.0.0"]
1313
license = "Apache-2.0"
1414
urls = { Homepage = "https://cocoindex.io/" }
1515

16+
[project.scripts]
17+
cocoindex = "cocoindex.cli:cli"
18+
1619
[tool.maturin]
1720
bindings = "pyo3"
1821
python-source = "python"

python/cocoindex/cli.py

Lines changed: 121 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,90 @@
11
import click
22
import datetime
3+
import sys
4+
import importlib.util
5+
import os
6+
import atexit
37

48
from rich.console import Console
59
from rich.table import Table
610

7-
from . import flow, lib, setting
11+
from . import flow, lib, setting, query
812
from .setup import sync_setup, drop_setup, flow_names_with_setup, apply_setup_changes
913

14+
# Create ServerSettings lazily upon first call, as environment variables may be loaded from files, etc.
15+
COCOINDEX_HOST = 'https://cocoindex.io'
16+
17+
def _load_user_app(app_path: str):
18+
"""Loads the user's application file as a module. Exits on failure."""
19+
if not app_path:
20+
click.echo("Internal Error: Application path not provided.", err=True)
21+
sys.exit(1)
22+
23+
app_path = os.path.abspath(app_path)
24+
app_dir = os.path.dirname(app_path)
25+
module_name = os.path.splitext(os.path.basename(app_path))[0]
26+
27+
original_sys_path = list(sys.path)
28+
if app_dir not in sys.path:
29+
sys.path.insert(0, app_dir)
30+
31+
try:
32+
spec = importlib.util.spec_from_file_location(module_name, app_path)
33+
if spec is None:
34+
raise ImportError(f"Could not load spec for file: {app_path}")
35+
module = importlib.util.module_from_spec(spec)
36+
sys.modules[spec.name] = module
37+
spec.loader.exec_module(module)
38+
return module
39+
except Exception as e:
40+
raise click.ClickException(f"Failed importing application module '{os.path.basename(app_path)}': {e}")
41+
finally:
42+
sys.path = original_sys_path
43+
44+
def _ensure_flows_and_handlers_built():
45+
"""Builds flows and handlers after app load. Exits on failure."""
46+
try:
47+
flow.ensure_all_flows_built()
48+
query.ensure_all_handlers_built()
49+
except Exception as e:
50+
click.echo(f"\nError: Failed processing flows/handlers from application.", err=True)
51+
click.echo(f"Reason: {e}", err=True)
52+
sys.exit(1)
53+
1054
@click.group()
55+
@click.version_option(package_name="cocoindex", message="%(prog)s version %(version)s")
1156
def cli():
1257
"""
13-
CLI for Cocoindex.
58+
CLI for Cocoindex. Requires --app for most commands.
1459
"""
60+
try:
61+
settings = setting.Settings.from_env()
62+
lib.init(settings)
63+
atexit.register(lib.stop)
64+
except Exception as e:
65+
raise click.ClickException(f"Failed to initialize CocoIndex library: {e}")
1566

1667
@cli.command()
68+
@click.option(
69+
'--app', 'app_path', required=False,
70+
type=click.Path(exists=True, file_okay=True, dir_okay=False, readable=True, resolve_path=True),
71+
help="Path to the Python file defining flows."
72+
)
1773
@click.option(
1874
"-a", "--all", "show_all", is_flag=True, show_default=True, default=False,
1975
help="Also show all flows with persisted setup, even if not defined in the current process.")
20-
def ls(show_all: bool):
76+
def ls(app_path: str | None, show_all: bool):
2177
"""
2278
List all flows.
2379
"""
24-
current_flow_names = flow.flow_names()
80+
current_flow_names = set()
81+
82+
if app_path:
83+
_load_user_app(app_path)
84+
current_flow_names = set(flow.flow_names())
85+
elif not show_all:
86+
raise click.UsageError("The --app <path/to/app.py> option is required unless using --all.")
87+
2588
persisted_flow_names = flow_names_with_setup()
2689
remaining_persisted_flow_names = set(persisted_flow_names)
2790

@@ -52,21 +115,28 @@ def ls(show_all: bool):
52115
click.echo(' [?]: Flows with persisted setup, but not in the current process.')
53116

54117
@cli.command()
118+
@click.option(
119+
'--app', 'app_path', required=True,
120+
type=click.Path(exists=True, file_okay=True, dir_okay=False, readable=True, resolve_path=True),
121+
help="Path to the Python file defining the flow."
122+
)
55123
@click.argument("flow_name", type=str, required=False)
56124
@click.option("--color/--no-color", default=True, help="Enable or disable colored output.")
57125
@click.option("--verbose", is_flag=True, help="Show verbose output with full details.")
58-
def show(flow_name: str | None, color: bool, verbose: bool):
126+
def show(app_path: str, flow_name: str | None, color: bool, verbose: bool):
59127
"""
60-
Show the flow spec and schema in a readable format with colored output.
128+
Show the flow spec and schema in a readable format.
61129
"""
130+
_load_user_app(app_path)
131+
62132
flow = _flow_by_name(flow_name)
63133
console = Console(no_color=not color)
64134
console.print(flow._render_spec(verbose=verbose))
65135

66136
console.print()
67137
table = Table(
68138
title=f"Schema for Flow: {flow.name}",
69-
show_header=True,
139+
title_style="cyan",
70140
header_style="bold magenta"
71141
)
72142
table.add_column("Field", style="cyan")
@@ -79,11 +149,17 @@ def show(flow_name: str | None, color: bool, verbose: bool):
79149
console.print(table)
80150

81151
@cli.command()
82-
def setup():
152+
@click.option(
153+
'--app', 'app_path', required=True,
154+
type=click.Path(exists=True, file_okay=True, dir_okay=False, readable=True, resolve_path=True),
155+
help="Path to the Python file defining flows to set up."
156+
)
157+
def setup(app_path: str):
83158
"""
84159
Check and apply backend setup changes for flows, including the internal and target storage
85160
(to export).
86161
"""
162+
_load_user_app(app_path)
87163
setup_status = sync_setup()
88164
click.echo(setup_status)
89165
if setup_status.is_up_to_date():
@@ -95,16 +171,25 @@ def setup():
95171
apply_setup_changes(setup_status)
96172

97173
@cli.command()
174+
@click.option(
175+
'--app', 'app_path', required=False,
176+
type=click.Path(exists=True, file_okay=True, dir_okay=False, readable=True, resolve_path=True),
177+
help="Path to the app file (needed if not using --all or specific names)."
178+
)
98179
@click.argument("flow_name", type=str, nargs=-1)
99180
@click.option(
100181
"-a", "--all", "drop_all", is_flag=True, show_default=True, default=False,
101182
help="Drop the backend setup for all flows with persisted setup, "
102183
"even if not defined in the current process.")
103-
def drop(flow_name: tuple[str, ...], drop_all: bool):
184+
def drop(app_path: str | None, flow_name: tuple[str, ...], drop_all: bool):
104185
"""
105186
Drop the backend setup for specified flows.
106187
If no flow is specified, all flows defined in the current process will be dropped.
107188
"""
189+
if not app_path:
190+
raise click.UsageError("The --app <path> option is required when dropping flows defined in the app (and not using --all or specific flow names).")
191+
_load_user_app(app_path)
192+
108193
if drop_all:
109194
flow_names = flow_names_with_setup()
110195
elif len(flow_name) == 0:
@@ -122,17 +207,23 @@ def drop(flow_name: tuple[str, ...], drop_all: bool):
122207
apply_setup_changes(setup_status)
123208

124209
@cli.command()
210+
@click.option(
211+
'--app', 'app_path', required=True,
212+
type=click.Path(exists=True, file_okay=True, dir_okay=False, readable=True, resolve_path=True),
213+
help="Path to the Python file defining flows."
214+
)
125215
@click.argument("flow_name", type=str, required=False)
126216
@click.option(
127217
"-L", "--live", is_flag=True, show_default=True, default=False,
128218
help="Continuously watch changes from data sources and apply to the target index.")
129219
@click.option(
130220
"-q", "--quiet", is_flag=True, show_default=True, default=False,
131221
help="Avoid printing anything to the standard output, e.g. statistics.")
132-
def update(flow_name: str | None, live: bool, quiet: bool):
222+
def update(app_path: str, flow_name: str | None, live: bool, quiet: bool):
133223
"""
134224
Update the index to reflect the latest data from data sources.
135225
"""
226+
_load_user_app(app_path)
136227
options = flow.FlowLiveUpdaterOptions(live_mode=live, print_stats=not quiet)
137228
if flow_name is None:
138229
return flow.update_all_flows(options)
@@ -142,6 +233,11 @@ def update(flow_name: str | None, live: bool, quiet: bool):
142233
return updater.update_stats()
143234

144235
@cli.command()
236+
@click.option(
237+
'--app', 'app_path', required=True,
238+
type=click.Path(exists=True, file_okay=True, dir_okay=False, readable=True, resolve_path=True),
239+
help="Path to the Python file defining the flow."
240+
)
145241
@click.argument("flow_name", type=str, required=False)
146242
@click.option(
147243
"-o", "--output-dir", type=str, required=False,
@@ -151,23 +247,26 @@ def update(flow_name: str | None, live: bool, quiet: bool):
151247
help="Use already-cached intermediate data if available. "
152248
"Note that we only reuse existing cached data without updating the cache "
153249
"even if it's turned on.")
154-
def evaluate(flow_name: str | None, output_dir: str | None, cache: bool = True):
250+
def evaluate(app_path: str, flow_name: str | None, output_dir: str | None, cache: bool = True):
155251
"""
156252
Evaluate the flow and dump flow outputs to files.
157253
158254
Instead of updating the index, it dumps what should be indexed to files.
159255
Mainly used for evaluation purpose.
160256
"""
257+
_load_user_app(app_path)
161258
fl = _flow_by_name(flow_name)
162259
if output_dir is None:
163260
output_dir = f"eval_{fl.name}_{datetime.datetime.now().strftime('%y%m%d_%H%M%S')}"
164261
options = flow.EvaluateAndDumpOptions(output_dir=output_dir, use_cache=cache)
165262
fl.evaluate_and_dump(options)
166263

167-
# Create ServerSettings lazily upon first call, as environment variables may be loaded from files, etc.
168-
COCOINDEX_HOST = 'https://cocoindex.io'
169-
170264
@cli.command()
265+
@click.option(
266+
"--app", "app_path", required=True,
267+
type=click.Path(exists=True, file_okay=True, dir_okay=False, readable=True, resolve_path=True),
268+
help="Path to the Python file defining flows and handlers."
269+
)
171270
@click.option(
172271
"-a", "--address", type=str,
173272
help="The address to bind the server to, in the format of IP:PORT. "
@@ -190,13 +289,16 @@ def evaluate(flow_name: str | None, output_dir: str | None, cache: bool = True):
190289
@click.option(
191290
"-q", "--quiet", is_flag=True, show_default=True, default=False,
192291
help="Avoid printing anything to the standard output, e.g. statistics.")
193-
def server(address: str | None, live_update: bool, quiet: bool, cors_origin: str | None,
194-
cors_cocoindex: bool, cors_local: int | None):
292+
def server(app_path: str, address: str | None, live_update: bool, quiet: bool,
293+
cors_origin: str | None, cors_cocoindex: bool, cors_local: int | None):
195294
"""
196295
Start a HTTP server providing REST APIs.
197296
198297
It will allow tools like CocoInsight to access the server.
199298
"""
299+
_load_user_app(app_path)
300+
_ensure_flows_and_handlers_built()
301+
200302
server_settings = setting.ServerSettings.from_env()
201303
cors_origins: set[str] = set(server_settings.cors_origins or [])
202304
if cors_origin is not None:
@@ -235,3 +337,6 @@ def _flow_name(name: str | None) -> str:
235337

236338
def _flow_by_name(name: str | None) -> flow.Flow:
237339
return flow.flow_by_name(_flow_name(name))
340+
341+
if __name__ == "__main__":
342+
cli()

python/cocoindex/lib.py

Lines changed: 23 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,10 @@
11
"""
22
Library level functions and states.
33
"""
4-
import sys
5-
import functools
6-
import inspect
4+
import warnings
5+
from typing import Callable, Any
76

8-
from typing import Callable
9-
10-
from . import _engine
11-
from . import flow, query, cli, setting
7+
from . import _engine, setting
128
from .convert import dump_engine_object
139

1410

@@ -19,59 +15,36 @@ def init(settings: setting.Settings):
1915

2016
def start_server(settings: setting.ServerSettings):
2117
"""Start the cocoindex server."""
22-
flow.ensure_all_flows_built()
23-
query.ensure_all_handlers_built()
2418
_engine.start_server(settings.__dict__)
2519

2620
def stop():
2721
"""Stop the cocoindex library."""
2822
_engine.stop()
2923

3024
def main_fn(
31-
settings: setting.Settings | None = None,
32-
cocoindex_cmd: str = 'cocoindex',
25+
settings: Any | None = None,
26+
cocoindex_cmd: str | None = None,
3327
) -> Callable[[Callable], Callable]:
3428
"""
35-
A decorator to wrap the main function.
36-
If the python binary is called with the given command, it yields control to the cocoindex CLI.
37-
38-
If the settings are not provided, they are loaded from the environment variables.
29+
DEPRECATED: Using @cocoindex.main_fn() is no longer supported and has no effect.
30+
This decorator will be removed in a future version, which will cause an AttributeError.
31+
Please remove it from your code and use the standalone 'cocoindex' CLI.
3932
"""
40-
41-
def _pre_init() -> None:
42-
effective_settings = settings or setting.Settings.from_env()
43-
init(effective_settings)
44-
45-
def _should_run_cli() -> bool:
46-
return len(sys.argv) > 1 and sys.argv[1] == cocoindex_cmd
47-
48-
def _run_cli():
49-
return cli.cli.main(sys.argv[2:], prog_name=f"{sys.argv[0]} {sys.argv[1]}")
33+
warnings.warn(
34+
"\n\n"
35+
"!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"
36+
"CRITICAL DEPRECATION NOTICE from CocoIndex:\n"
37+
"The @cocoindex.main_fn() decorator found in your script is DEPRECATED and IGNORED.\n"
38+
"It provides NO functionality and will be REMOVED entirely in a future version.\n"
39+
"If not removed, your script will FAIL with an AttributeError in the future.\n\n"
40+
"ACTION REQUIRED: Please REMOVE @cocoindex.main_fn() from your Python script.\n\n"
41+
"To use CocoIndex commands, invoke the standalone 'cocoindex' CLI:\n"
42+
" cocoindex <command> [options] --app <your_script.py>\n"
43+
"!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n\n",
44+
DeprecationWarning,
45+
stacklevel=2
46+
)
5047

5148
def _main_wrapper(fn: Callable) -> Callable:
52-
if inspect.iscoroutinefunction(fn):
53-
@functools.wraps(fn)
54-
async def _inner(*args, **kwargs):
55-
_pre_init()
56-
try:
57-
if _should_run_cli():
58-
# Schedule to a separate thread as it invokes nested event loop.
59-
# return await asyncio.to_thread(_run_cli)
60-
return _run_cli()
61-
return await fn(*args, **kwargs)
62-
finally:
63-
stop()
64-
return _inner
65-
else:
66-
@functools.wraps(fn)
67-
def _inner(*args, **kwargs):
68-
_pre_init()
69-
try:
70-
if _should_run_cli():
71-
return _run_cli()
72-
return fn(*args, **kwargs)
73-
finally:
74-
stop()
75-
return _inner
76-
49+
return fn
7750
return _main_wrapper

0 commit comments

Comments
 (0)