Skip to content

Commit 6f2430a

Browse files
authored
Merge pull request #120 from WecoAI/dev
Merge Dev - Add support for observability + external optimizer
2 parents 00a8ffc + d709b25 commit 6f2430a

File tree

9 files changed

+395
-5
lines changed

9 files changed

+395
-5
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ name = "weco"
88
authors = [{ name = "Weco AI Team", email = "contact@weco.ai" }]
99
description = "Documentation for `weco`, a CLI for using Weco AI's code optimizer."
1010
readme = "README.md"
11-
version = "0.3.18"
11+
version = "0.3.19"
1212
license = { file = "LICENSE" }
1313
requires-python = ">=3.9"
1414
dependencies = [

tests/langsmith/wizard/test_server.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ def test_set_key_failure_clears_key(self, mock_os, mock_client_prop, wizard_serv
206206
resp, data = post_json(conn, "/api/set-key", {"key": "bad-key"})
207207
assert resp.status == 200
208208
assert data["connected"] is False
209-
assert data["error"] == "Connection failed. Check that your API key is valid."
209+
assert data["error"] == "Connection failed: Check that your API key is valid."
210210
mock_os.environ.pop.assert_called_once_with("LANGCHAIN_API_KEY", None)
211211

212212
@patch.object(WizardServer, "client", new_callable=PropertyMock)

weco/cli.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
from .auth import perform_login
88
from .config import clear_api_key, load_weco_api_key
9+
from .observe.cli import configure_observe_parser, execute_observe_command
910
from .constants import DEFAULT_MODELS
1011
from .events import (
1112
send_event,
@@ -468,6 +469,14 @@ def _main() -> None:
468469
setup_parser = subparsers.add_parser("setup", help="Set up Weco for use with AI tools")
469470
configure_setup_parser(setup_parser)
470471

472+
# --- Observe Command Parser Setup ---
473+
observe_parser = subparsers.add_parser(
474+
"observe",
475+
help="Track external optimization runs (init, log, complete, fail)",
476+
formatter_class=argparse.RawTextHelpFormatter,
477+
)
478+
configure_observe_parser(observe_parser)
479+
471480
args = parser.parse_args()
472481

473482
# Create event context with via_skill flag
@@ -513,6 +522,9 @@ def _main() -> None:
513522

514523
handle_setup_command(args, console)
515524
sys.exit(0)
525+
elif args.command == "observe":
526+
execute_observe_command(args)
527+
sys.exit(0)
516528
else:
517529
# This case should be hit if 'weco' is run alone and chatbot logic didn't catch it,
518530
# or if an invalid command is provided.

weco/config.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,17 @@ def save_api_key(api_key: str):
4343

4444

4545
def load_weco_api_key() -> str | None:
46-
"""Loads the Weco API key."""
46+
"""Loads the Weco API key.
47+
48+
Resolution order:
49+
1. WECO_API_KEY environment variable
50+
2. Credentials file (~/.config/weco/credentials.json)
51+
"""
52+
# Environment variable takes precedence
53+
env_key = os.environ.get("WECO_API_KEY")
54+
if env_key:
55+
return env_key
56+
4757
if not CREDENTIALS_FILE.exists():
4858
return None
4959
try:

weco/integrations/langsmith/wizard/server.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ def handle_status(self):
137137
list(self.server.client.list_datasets(limit=1))
138138
connected = True
139139
except Exception:
140+
raise
140141
pass
141142

142143
state = self.server.initial_state
@@ -179,10 +180,21 @@ def handle_set_key(self):
179180
try:
180181
list(self.server.client.list_datasets(limit=1))
181182
self.send_json({"connected": True, "error": None})
182-
except Exception:
183+
except Exception as e:
183184
os.environ.pop("LANGCHAIN_API_KEY", None)
184185
self.server.reset_client()
185-
self.send_json({"connected": False, "error": "Connection failed. Check that your API key is valid."})
186+
# Show error type/status without leaking the full exception (which may contain the key)
187+
error_type = type(e).__name__
188+
detail = ""
189+
if "401" in str(e) or "403" in str(e):
190+
detail = "API key was rejected (check it's valid and for the correct workspace)."
191+
elif "404" in str(e):
192+
detail = "LangSmith API endpoint not found (check LANGCHAIN_ENDPOINT)."
193+
elif "ConnectionError" in error_type or "timeout" in str(e).lower():
194+
detail = "Could not reach LangSmith API (check your network connection)."
195+
else:
196+
detail = "Check that your API key is valid."
197+
self.send_json({"connected": False, "error": f"Connection failed: {detail}"})
186198

187199
def handle_list_datasets(self):
188200
try:

weco/observe/__init__.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
"""Weco Observe — observability SDK for external optimization loops.
2+
3+
Usage:
4+
from weco.observe import WecoObserver
5+
6+
obs = WecoObserver()
7+
run = obs.create_run(
8+
name="val_bpb sweep v3",
9+
source_code={"train.py": open("train.py").read()},
10+
primary_metric="val_bpb",
11+
maximize=False,
12+
)
13+
14+
run.log_step(
15+
step=i,
16+
status="completed",
17+
description="Added RMSNorm",
18+
metrics={"val_bpb": 1.03, "memory_gb": 34.5},
19+
code={"train.py": open("train.py").read()},
20+
)
21+
22+
"""
23+
24+
from .observer import WecoObserver, ObserveRun
25+
26+
__all__ = ["WecoObserver", "ObserveRun"]

weco/observe/api.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
"""HTTP client for external run API endpoints.
2+
3+
All functions are synchronous (using requests) and never raise exceptions.
4+
Errors are returned as None so the caller can warn without crashing.
5+
"""
6+
7+
import warnings
8+
from typing import Any
9+
10+
import requests
11+
12+
from weco import __base_url__
13+
14+
15+
def create_run(
16+
*,
17+
source_code: dict[str, str],
18+
metric_name: str,
19+
maximize: bool,
20+
name: str | None = None,
21+
additional_instructions: str | None = None,
22+
metadata: dict[str, Any] | None = None,
23+
auth_headers: dict[str, str],
24+
) -> dict | None:
25+
"""Create an external run. Returns response dict or None on failure."""
26+
try:
27+
payload: dict[str, Any] = {"source_code": source_code, "metric_name": metric_name, "maximize": maximize}
28+
if name is not None:
29+
payload["name"] = name
30+
if additional_instructions is not None:
31+
payload["additional_instructions"] = additional_instructions
32+
if metadata:
33+
payload["metadata"] = metadata
34+
35+
response = requests.post(f"{__base_url__}/external/runs", json=payload, headers=auth_headers, timeout=(5, 30))
36+
response.raise_for_status()
37+
return response.json()
38+
except Exception as e:
39+
warnings.warn(f"weco observe: failed to create run: {e}", stacklevel=2)
40+
return None
41+
42+
43+
def log_step(
44+
*,
45+
run_id: str,
46+
step: int,
47+
status: str = "completed",
48+
description: str | None = None,
49+
metrics: dict[str, float] | None = None,
50+
code: dict[str, str] | None = None,
51+
parent_step: int | None = None,
52+
metadata: dict[str, Any] | None = None,
53+
auth_headers: dict[str, str],
54+
) -> dict | None:
55+
"""Log a step for an external run. Returns response dict or None on failure."""
56+
try:
57+
payload: dict[str, Any] = {"step": step, "status": status}
58+
if description is not None:
59+
payload["description"] = description
60+
if metrics:
61+
payload["metrics"] = metrics
62+
if code is not None:
63+
payload["code"] = code
64+
if parent_step is not None:
65+
payload["parent_step"] = parent_step
66+
if metadata:
67+
payload["metadata"] = metadata
68+
69+
response = requests.post(
70+
f"{__base_url__}/external/runs/{run_id}/steps", json=payload, headers=auth_headers, timeout=(5, 30)
71+
)
72+
response.raise_for_status()
73+
return response.json()
74+
except Exception as e:
75+
warnings.warn(f"weco observe: failed to log step {step}: {e}", stacklevel=2)
76+
return None

weco/observe/cli.py

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
"""CLI commands for weco observe.
2+
3+
All commands follow the fire-and-forget pattern: they print warnings to
4+
stderr on failure but always exit 0 so they never crash an agent's loop.
5+
"""
6+
7+
import argparse
8+
import json
9+
import sys
10+
import warnings
11+
12+
from weco.auth import handle_authentication
13+
from weco.observe import api
14+
15+
16+
def configure_observe_parser(observe_parser: argparse.ArgumentParser) -> None:
17+
"""Configure the observe command parser and all its subcommands."""
18+
subparsers = observe_parser.add_subparsers(dest="observe_command", help="Observe commands")
19+
20+
# --- init ---
21+
init_parser = subparsers.add_parser("init", help="Initialize an external run for tracking")
22+
init_parser.add_argument("--name", type=str, default=None, help="Run name")
23+
init_parser.add_argument("--metric", type=str, required=True, help="Primary metric name (e.g. val_bpb)")
24+
init_parser.add_argument(
25+
"-g",
26+
"--goal",
27+
type=str,
28+
choices=["maximize", "max", "minimize", "min"],
29+
default="minimize",
30+
help="Specify 'maximize'/'max' or 'minimize'/'min' (default: minimize)",
31+
)
32+
init_source_group = init_parser.add_mutually_exclusive_group(required=True)
33+
init_source_group.add_argument(
34+
"-s", "--source", type=str, help="Path to a single source code file to track (e.g. train.py)"
35+
)
36+
init_source_group.add_argument(
37+
"--sources", nargs="+", type=str, help="Paths to multiple source code files to track (e.g. train.py prepare.py)"
38+
)
39+
init_parser.add_argument(
40+
"-i", "--additional-instructions", type=str, default=None, help="Additional instructions for the run"
41+
)
42+
43+
# --- log ---
44+
log_parser = subparsers.add_parser("log", help="Log a step for an external run")
45+
log_parser.add_argument("--run-id", type=str, required=True, help="Run ID (from weco observe init)")
46+
log_parser.add_argument("--step", type=int, required=True, help="Step number")
47+
log_parser.add_argument(
48+
"--status", type=str, default="completed", choices=["completed", "failed"], help="Step status (default: completed)"
49+
)
50+
log_parser.add_argument("--description", type=str, default=None, help="Description of what was tried")
51+
log_parser.add_argument("--metrics", type=str, default=None, help="Metrics as JSON (e.g. '{\"val_bpb\": 1.03}')")
52+
log_source_group = log_parser.add_mutually_exclusive_group()
53+
log_source_group.add_argument("-s", "--source", type=str, default=None, help="Single source code file to snapshot")
54+
log_source_group.add_argument(
55+
"--sources", nargs="+", type=str, default=None, help="Multiple source code files to snapshot"
56+
)
57+
log_parser.add_argument("--parent-step", type=int, default=None, help="Parent step number for tree lineage")
58+
59+
# --- complete/fail are no longer needed ---
60+
# External run lifecycle is managed by the dashboard, not the CLI.
61+
# Logging a step to a closed run will silently reopen it.
62+
63+
64+
def _read_code_files(paths: list[str]) -> dict[str, str]:
65+
"""Read source code files from disk."""
66+
source_code = {}
67+
for path in paths:
68+
try:
69+
with open(path) as f:
70+
source_code[path] = f.read()
71+
except FileNotFoundError:
72+
warnings.warn(f"weco observe: file not found: {path}", stacklevel=2)
73+
except Exception as e:
74+
warnings.warn(f"weco observe: error reading {path}: {e}", stacklevel=2)
75+
return source_code
76+
77+
78+
def execute_observe_command(args: argparse.Namespace) -> None:
79+
"""Execute an observe subcommand. Always exits 0."""
80+
if not args.observe_command:
81+
print("Usage: weco observe {init,log,complete,fail}", file=sys.stderr)
82+
sys.exit(0)
83+
84+
# Authenticate
85+
try:
86+
_, auth_headers = handle_authentication(None)
87+
if not auth_headers:
88+
print("weco observe: not logged in. Run `weco login` first.", file=sys.stderr)
89+
sys.exit(0)
90+
except Exception as e:
91+
print(f"weco observe: authentication failed: {e}", file=sys.stderr)
92+
sys.exit(0)
93+
94+
if args.observe_command == "init":
95+
_handle_init(args, auth_headers)
96+
elif args.observe_command == "log":
97+
_handle_log(args, auth_headers)
98+
99+
100+
def _handle_init(args: argparse.Namespace, auth_headers: dict) -> None:
101+
"""Handle `weco observe init`."""
102+
source_arg = args.sources if args.sources is not None else [args.source]
103+
source_code = _read_code_files(source_arg)
104+
if not source_code:
105+
print("weco observe: no source files could be read", file=sys.stderr)
106+
sys.exit(0)
107+
108+
maximize = args.goal in ("maximize", "max")
109+
110+
result = api.create_run(
111+
source_code=source_code,
112+
metric_name=args.metric,
113+
maximize=maximize,
114+
name=args.name,
115+
additional_instructions=args.additional_instructions,
116+
auth_headers=auth_headers,
117+
)
118+
119+
if result and result.get("run_id"):
120+
# Print only the run_id to stdout so it can be captured by $(...)
121+
print(result["run_id"])
122+
else:
123+
print("weco observe: failed to create run", file=sys.stderr)
124+
125+
126+
def _handle_log(args: argparse.Namespace, auth_headers: dict) -> None:
127+
"""Handle `weco observe log`."""
128+
# Parse metrics JSON
129+
metrics = {}
130+
if args.metrics:
131+
try:
132+
metrics = json.loads(args.metrics)
133+
except json.JSONDecodeError as e:
134+
print(f"weco observe: invalid metrics JSON: {e}", file=sys.stderr)
135+
sys.exit(0)
136+
137+
# Read source files if specified
138+
code = None
139+
source_arg = args.sources if args.sources is not None else ([args.source] if args.source else None)
140+
if source_arg:
141+
code = _read_code_files(source_arg)
142+
143+
api.log_step(
144+
run_id=args.run_id,
145+
step=args.step,
146+
status=args.status,
147+
description=args.description,
148+
metrics=metrics,
149+
code=code,
150+
parent_step=args.parent_step,
151+
auth_headers=auth_headers,
152+
)

0 commit comments

Comments
 (0)