Skip to content

Commit b63789c

Browse files
authored
[cli] add support for automatically download and run dataflow-webui (#451)
* [cli] add support for automatically download webui * [cli] add auto request to check when to start browser
1 parent ef62020 commit b63789c

File tree

2 files changed

+240
-11
lines changed

2 files changed

+240
-11
lines changed

dataflow/cli.py

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,6 @@
2727
ADAPTER_FILES = {"adapter_config.json", "adapter_model.bin", "adapter_model.safetensors"}
2828
BASE_MODEL_FILES = {"config.json", "pytorch_model.bin", "model.safetensors", "tokenizer.json", "tokenizer_config.json"}
2929

30-
31-
32-
3330
def check_updates() -> None:
3431
"""Print version and try to query PyPI for newer version (best-effort)."""
3532
cols = 80
@@ -380,13 +377,19 @@ def text2model_train(input_dir: Path = typer.Argument(Path("."), help="Input dir
380377

381378

382379
@app.command()
383-
def webui(mode: str = typer.Option("operators", help="operators | agent | pdf")):
384-
"""Launch (placeholder) WebUI modes."""
385-
if mode in {"operators", "pdf"}:
386-
_echo("WebUI is under maintenance; check back later.", "yellow")
387-
else:
388-
_echo("Agent UI deprecated; see DataFlow-Agent repo.", "yellow")
389-
390-
380+
def webui(
381+
zippath: Optional[Path] = typer.Option(None, "--zippath", help="Use a local release zip instead of downloading."),
382+
host: str = typer.Option("0.0.0.0", "--host", help="Host to bind (default: 0.0.0.0)"),
383+
port: int = typer.Option(8000, "--port", help="Port to bind (default: 8000)"),
384+
):
385+
"""Download latest WebUI release zip and run it."""
386+
try:
387+
from dataflow.cli_funcs.cli_webui import cli_webui # type: ignore
388+
cli_webui(zippath=zippath, host=host, port=port)
389+
except SystemExit:
390+
raise typer.Exit(code=0)
391+
except Exception as e:
392+
_echo(f"webui error: {e}", "red")
393+
raise typer.Exit(code=1)
391394
if __name__ == "__main__":
392395
app()

dataflow/cli_funcs/cli_webui.py

Lines changed: 226 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,226 @@
1+
from __future__ import annotations
2+
3+
import os
4+
import shutil
5+
import zipfile
6+
from pathlib import Path
7+
from typing import Optional, Tuple
8+
import webbrowser
9+
import requests
10+
import threading
11+
import time
12+
13+
from dataflow.cli_funcs.utils import _echo
14+
15+
OWNER = "OpenDCAI"
16+
REPO = "DataFlow-WebUI"
17+
LATEST_API = f"https://api.github.com/repos/{OWNER}/{REPO}/releases/latest"
18+
REPO_URL = f"https://github.com/{OWNER}/{REPO}"
19+
20+
21+
def _ask_yes(prompt: str, default_no: bool = True) -> bool:
22+
suffix = " [y/N]: " if default_no else " [Y/n]: "
23+
ans = input(prompt + suffix).strip().lower()
24+
if not ans:
25+
return not default_no
26+
return ans in {"y", "yes"}
27+
28+
29+
def _confirm_yes() -> None:
30+
_echo(f"This will run DataFlow-WebUI ({REPO_URL}) by using a GitHub Release zip.", "yellow")
31+
# if input("Type 'yes' to continue: ").strip().lower() != "yes":
32+
if not _ask_yes("Do you confirm to continue?", default_no=False):
33+
raise SystemExit(0)
34+
35+
36+
def _pick_zip(release: dict) -> Tuple[str, str, str]:
37+
tag = release.get("tag_name") or "latest"
38+
for a in release.get("assets", []):
39+
name = a.get("name", "")
40+
url = a.get("browser_download_url", "")
41+
if name.startswith("DataFlow-WebUI-") and name.endswith(".zip") and url:
42+
return tag, name, url
43+
raise RuntimeError("No DataFlow-WebUI-*.zip found in latest release.")
44+
45+
46+
def _ask_base_dir(default: Path) -> Path:
47+
_echo(f"Default download directory: {default}", "cyan")
48+
ans = input(f"Download directory [{default}]: ").strip()
49+
return (Path(ans).expanduser().resolve() if ans else default.resolve())
50+
51+
def _open_browser(url: str) -> None:
52+
try:
53+
ok = webbrowser.open(url, new=2) # new=2: new tab if possible
54+
if ok:
55+
_echo(f"Opened browser: {url}", "green")
56+
else:
57+
_echo(f"Please open in browser: {url}", "cyan")
58+
except Exception:
59+
_echo(f"Please open in browser: {url}", "cyan")
60+
61+
def _wait_open_browser_async(host: str, port: int, path: str = "/ui/", timeout_s: int = 60) -> None:
62+
"""
63+
Start a daemon thread:
64+
- poll http://{host}:{port}{path} every 1s
65+
- within timeout_s, once any HTTP response is received, open browser and report to stdout
66+
- if timeout, report startup failure (cannot stop uvicorn because we use os.system)
67+
"""
68+
# 0.0.0.0 只能 bind,不能用于本机访问;本机访问用 127.0.0.1
69+
visit_host = "127.0.0.1" if host in {"0.0.0.0", "0.0.0.0/0"} else host
70+
url = f"http://{visit_host}:{port}"
71+
ui_url = f"{url}/ui/"
72+
if not url.endswith("/"):
73+
url += "/"
74+
75+
def _worker() -> None:
76+
_echo(f"[webui] Waiting for server... {url} (timeout={timeout_s}s)", "cyan")
77+
start = time.time()
78+
while time.time() - start < timeout_s:
79+
try:
80+
# 只要能建立连接并拿到任意 HTTP 响应就算 ready
81+
r = requests.get(url, timeout=0.8)
82+
_echo(f"[webui] Server is up ({r.status_code}). Opening browser: {url}", "green")
83+
try:
84+
webbrowser.open(ui_url, new=2)
85+
except Exception as e:
86+
_echo(f"[webui] Failed to open browser automatically: {e}", "yellow")
87+
_echo(f"[webui] Please open manually: {url}", "cyan")
88+
return
89+
except Exception:
90+
time.sleep(1)
91+
92+
_echo(f"[webui] Timeout after {timeout_s}s — server did not respond at {url}", "red")
93+
_echo("[webui] Startup may have failed (or is still starting). Check terminal logs above.", "yellow")
94+
95+
t = threading.Thread(target=_worker, daemon=True)
96+
t.start()
97+
98+
99+
def _download_with_progress(url: str, dst: Path) -> None:
100+
try:
101+
dst.parent.mkdir(parents=True, exist_ok=True)
102+
with requests.get(url, stream=True, timeout=120) as r:
103+
r.raise_for_status()
104+
total = int(r.headers.get("Content-Length", 0))
105+
downloaded = 0
106+
last_percent = -1
107+
108+
with open(dst, "wb") as f:
109+
for chunk in r.iter_content(chunk_size=100 * 1024):
110+
if not chunk:
111+
continue
112+
f.write(chunk)
113+
downloaded += len(chunk)
114+
115+
if total > 0:
116+
percent = int(downloaded * 100 / total)
117+
if percent != last_percent:
118+
mb_done = downloaded / (1024 * 1024)
119+
mb_total = total / (1024 * 1024)
120+
print(
121+
f"\rDownloading: {percent:3d}% "
122+
f"({mb_done:.1f}/{mb_total:.1f} MB)",
123+
end="",
124+
flush=True,
125+
)
126+
last_percent = percent
127+
128+
if total > 0:
129+
print() # 换行
130+
except Exception as e:
131+
print(e)
132+
if dst.exists():
133+
dst.unlink(missing_ok=True)
134+
raise RuntimeError(f"Download failed: {e}, please mannually fetch it from {url}") from e
135+
136+
137+
def cli_webui(
138+
zippath: Optional[Path] = None,
139+
host: str = "0.0.0.0",
140+
port: int = 8000,
141+
) -> None:
142+
_confirm_yes()
143+
144+
# 1) 选择 base dir(默认 cwd/dataflow_webui)
145+
base_dir = _ask_base_dir(Path.cwd() / "dataflow_webui")
146+
downloads = base_dir / "downloads"
147+
releases = base_dir / "releases"
148+
downloads.mkdir(parents=True, exist_ok=True)
149+
releases.mkdir(parents=True, exist_ok=True)
150+
151+
_echo(f"Base directory: {base_dir}", "green")
152+
153+
# 2) 确定 zip(本地 or 最新 release)
154+
if zippath:
155+
zip_path = Path(zippath).expanduser().resolve()
156+
if not zip_path.is_file():
157+
raise RuntimeError(f"zippath not found: {zip_path}")
158+
tag = "local"
159+
_echo(f"Using local zip: {zip_path}", "green")
160+
else:
161+
r = requests.get(LATEST_API, headers={"Accept": "application/vnd.github+json"}, timeout=20)
162+
r.raise_for_status()
163+
release = r.json()
164+
tag, name, url = _pick_zip(release)
165+
zip_path = downloads / name
166+
167+
# 2.1 先检测是否存在,再决定要不要下载
168+
if zip_path.exists() and zip_path.stat().st_size > 0:
169+
_echo(f"Found existing zip: {zip_path}", "yellow")
170+
if _ask_yes("Overwrite and re-download this zip?", default_no=True):
171+
_echo(f"Re-downloading → {zip_path}", "cyan")
172+
zip_path.unlink(missing_ok=True)
173+
_echo(f"Downloading: {name}", "cyan")
174+
_download_with_progress(url, zip_path)
175+
176+
else:
177+
_echo("Using existing zip (skip download).", "green")
178+
else:
179+
_echo(f"Will download: {name}", "cyan")
180+
_echo(f"Download to : {zip_path}", "cyan")
181+
with requests.get(url, stream=True, timeout=120) as rr:
182+
rr.raise_for_status()
183+
with open(zip_path, "wb") as f:
184+
for chunk in rr.iter_content(1024 * 1024):
185+
if chunk:
186+
f.write(chunk)
187+
188+
# 3) 解压(同样:先检测是否存在,再问是否覆盖)
189+
extract_dir = releases / tag
190+
if extract_dir.exists():
191+
_echo(f"Found existing extracted dir: {extract_dir}", "yellow")
192+
if _ask_yes("Overwrite and re-extract?", default_no=True):
193+
shutil.rmtree(extract_dir)
194+
_echo(f"Extracting → {extract_dir}", "cyan")
195+
extract_dir.mkdir(parents=True, exist_ok=True)
196+
with zipfile.ZipFile(zip_path, "r") as zf:
197+
zf.extractall(extract_dir)
198+
else:
199+
_echo("Using existing extracted files (skip extract).", "green")
200+
else:
201+
_echo(f"Extracting → {extract_dir}", "cyan")
202+
extract_dir.mkdir(parents=True, exist_ok=True)
203+
with zipfile.ZipFile(zip_path, "r") as zf:
204+
zf.extractall(extract_dir)
205+
206+
# 4) 定位 backend
207+
dirs = [p for p in extract_dir.iterdir() if p.is_dir()]
208+
root = dirs[0] if len(dirs) == 1 else extract_dir
209+
backend = root / "backend"
210+
if not backend.exists():
211+
raise RuntimeError("backend/ directory not found after extraction.")
212+
213+
_echo(f"Backend directory: {backend}", "green")
214+
215+
# 5) 安装依赖(当前环境) + 运行
216+
_echo("Installing backend requirements into current Python environment...", "cyan")
217+
os.system(f"cd '{backend}' && python -m pip install -r requirements.txt")
218+
219+
_echo(f"Starting WebUI at http://{host}:{port}/ui/", "green")
220+
_wait_open_browser_async(host, port, path="/ui/", timeout_s=60)
221+
os.system(
222+
f"cd '{backend}' && "
223+
f"python -m uvicorn app.main:app "
224+
f"--reload --reload-dir app "
225+
f"--host {host} --port {port}"
226+
)

0 commit comments

Comments
 (0)