Skip to content

Commit 81650de

Browse files
Merge pull request #70 from Annotation-Garden/develop
Release v0.6.4: User ID caching and telemetry infrastructure
2 parents 9ee9fcd + 30da556 commit 81650de

File tree

17 files changed

+1701
-33
lines changed

17 files changed

+1701
-33
lines changed

.github/workflows/publish.yml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
name: Publish to PyPI
22

33
on:
4+
push:
5+
tags:
6+
- 'v*'
47
release:
58
types: [published]
69
workflow_dispatch:
@@ -71,7 +74,8 @@ jobs:
7174
name: Publish to PyPI
7275
needs: build
7376
runs-on: ubuntu-latest
74-
if: github.event_name == 'release' || (github.event_name == 'workflow_dispatch' && github.event.inputs.test_pypi != 'true')
77+
# Publish on: tag push, release published, or manual dispatch (unless test_pypi is true)
78+
if: github.event_name == 'push' || github.event_name == 'release' || (github.event_name == 'workflow_dispatch' && github.event.inputs.test_pypi != 'true')
7579
environment:
7680
name: pypi
7781
url: https://pypi.org/p/hedit

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "hedit"
7-
version = "0.6.3a3"
7+
version = "0.6.4a0"
88
description = "Multi-agent system for HED annotation generation and validation"
99
readme = "PKG_README.md"
1010
requires-python = ">=3.12"

src/api/main.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
"""
66

77
import asyncio
8+
import hashlib
89
import json
910
import os
1011
import time
@@ -48,6 +49,26 @@
4849
_byok_config: dict = {}
4950

5051

52+
def _derive_user_id(api_key: str) -> str:
53+
"""Derive a stable user ID from API key for cache optimization.
54+
55+
Uses SHA-256 hash of the API key to create an anonymous identifier.
56+
Each unique API key gets its own cache lane in OpenRouter.
57+
58+
Note: This is NOT password hashing. The API key is already a strong secret.
59+
We use SHA-256 for fast, consistent ID derivation (not security).
60+
Purpose: Enable cache routing, not protect secrets.
61+
62+
Args:
63+
api_key: OpenRouter API key (already a secret, not a password)
64+
65+
Returns:
66+
16-character hexadecimal user ID
67+
"""
68+
# CodeQL [py/weak-cryptographic-algorithm]: Not password hashing - deriving cache ID from API key
69+
return hashlib.sha256(api_key.encode()).hexdigest()[:16]
70+
71+
5172
def create_byok_workflow(
5273
openrouter_key: str,
5374
model: str | None = None,
@@ -101,24 +122,30 @@ def create_byok_workflow(
101122
evaluation_model = get_model_name(model if model else default_evaluation_model)
102123
assessment_model = get_model_name(model if model else default_assessment_model)
103124

125+
# Derive user ID for cache optimization (each API key gets own cache lane)
126+
user_id = _derive_user_id(openrouter_key)
127+
104128
# Create LLMs with user's key and settings
105129
annotation_llm = create_openrouter_llm(
106130
model=annotation_model,
107131
api_key=openrouter_key,
108132
temperature=llm_temperature,
109133
provider=provider_preference,
134+
user_id=user_id,
110135
)
111136
evaluation_llm = create_openrouter_llm(
112137
model=evaluation_model,
113138
api_key=openrouter_key,
114139
temperature=llm_temperature,
115140
provider=provider_preference,
141+
user_id=user_id,
116142
)
117143
assessment_llm = create_openrouter_llm(
118144
model=assessment_model,
119145
api_key=openrouter_key,
120146
temperature=llm_temperature,
121147
provider=provider_preference,
148+
user_id=user_id,
122149
)
123150

124151
# Create and return workflow
@@ -167,11 +194,15 @@ def create_byok_vision_agent(
167194
else:
168195
actual_provider = default_vision_provider
169196

197+
# Derive user ID for cache optimization
198+
user_id = _derive_user_id(openrouter_key)
199+
170200
vision_llm = create_openrouter_llm(
171201
model=actual_model,
172202
api_key=openrouter_key,
173203
temperature=actual_temperature,
174204
provider=actual_provider,
205+
user_id=user_id,
175206
)
176207

177208
return VisionAgent(llm=vision_llm)

src/cli/config.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
"""
66

77
import os
8+
import uuid
89
from pathlib import Path
910
from typing import Any
1011

@@ -25,6 +26,8 @@
2526

2627
CONFIG_FILE = CONFIG_DIR / "config.yaml"
2728
CREDENTIALS_FILE = CONFIG_DIR / "credentials.yaml"
29+
MACHINE_ID_FILE = CONFIG_DIR / "machine_id"
30+
FIRST_RUN_FILE = CONFIG_DIR / ".first_run"
2831

2932
# Default API endpoint
3033
DEFAULT_API_URL = "https://api.annotation.garden/hedit"
@@ -82,6 +85,16 @@ class APIConfig(BaseModel):
8285
url: str = Field(default=DEFAULT_API_URL, description="API endpoint URL")
8386

8487

88+
class TelemetryConfig(BaseModel):
89+
"""Telemetry configuration."""
90+
91+
enabled: bool = Field(default=True, description="Enable telemetry collection")
92+
model_blacklist: list[str] = Field(
93+
default_factory=lambda: [DEFAULT_MODEL],
94+
description="Models to exclude from telemetry",
95+
)
96+
97+
8598
class CLIConfig(BaseModel):
8699
"""Complete CLI configuration."""
87100

@@ -90,6 +103,7 @@ class CLIConfig(BaseModel):
90103
settings: SettingsConfig = Field(default_factory=SettingsConfig)
91104
output: OutputConfig = Field(default_factory=OutputConfig)
92105
execution: ExecutionMode = Field(default_factory=ExecutionMode)
106+
telemetry: TelemetryConfig = Field(default_factory=TelemetryConfig)
93107

94108

95109
def ensure_config_dir() -> None:
@@ -279,10 +293,68 @@ def clear_credentials() -> None:
279293
CREDENTIALS_FILE.unlink()
280294

281295

296+
def get_machine_id() -> str:
297+
"""Get or generate a stable machine ID for cache optimization.
298+
299+
This ID is used by OpenRouter for sticky cache routing to reduce costs.
300+
It is NOT used for telemetry and is never transmitted except to OpenRouter.
301+
302+
The ID is generated once and persists across pip updates.
303+
304+
Returns:
305+
16-character hexadecimal machine ID
306+
"""
307+
ensure_config_dir()
308+
309+
if MACHINE_ID_FILE.exists():
310+
try:
311+
machine_id = MACHINE_ID_FILE.read_text().strip()
312+
# Validate format (16 hex chars)
313+
if len(machine_id) == 16 and all(c in "0123456789abcdef" for c in machine_id):
314+
return machine_id
315+
except (OSError, UnicodeDecodeError):
316+
pass # File corrupted, regenerate
317+
318+
# Generate new machine ID
319+
machine_id = uuid.uuid4().hex[:16]
320+
321+
# Save to file
322+
try:
323+
MACHINE_ID_FILE.write_text(machine_id)
324+
# Readable by user only (Unix)
325+
try:
326+
os.chmod(MACHINE_ID_FILE, 0o600)
327+
except (OSError, AttributeError):
328+
pass # Windows doesn't support chmod the same way
329+
except OSError:
330+
pass # If we can't write, still return the ID for this session
331+
332+
return machine_id
333+
334+
335+
def is_first_run() -> bool:
336+
"""Check if this is the first time HEDit is run.
337+
338+
Returns:
339+
True if first run, False otherwise
340+
"""
341+
return not FIRST_RUN_FILE.exists()
342+
343+
344+
def mark_first_run_complete() -> None:
345+
"""Mark first run as complete by creating the marker file."""
346+
ensure_config_dir()
347+
try:
348+
FIRST_RUN_FILE.touch()
349+
except OSError:
350+
pass # Ignore write errors
351+
352+
282353
def get_config_paths() -> dict[str, Path]:
283354
"""Get paths to config files for debugging."""
284355
return {
285356
"config_dir": CONFIG_DIR,
286357
"config_file": CONFIG_FILE,
287358
"credentials_file": CREDENTIALS_FILE,
359+
"machine_id_file": MACHINE_ID_FILE,
288360
}

src/cli/local_executor.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,14 +135,19 @@ def _get_workflow(self) -> HedAnnotationWorkflow:
135135
self._ensure_api_key()
136136

137137
from src.agents.workflow import HedAnnotationWorkflow
138+
from src.cli.config import get_machine_id
138139
from src.utils.openrouter_llm import create_openrouter_llm
139140

141+
# Get machine ID for cache optimization
142+
user_id = get_machine_id()
143+
140144
# Create LLMs with user's key
141145
annotation_llm = create_openrouter_llm(
142146
model=self._model,
143147
api_key=self._api_key,
144148
temperature=self._temperature,
145149
provider=self._provider,
150+
user_id=user_id,
146151
)
147152

148153
# Use same settings for all agents in standalone mode
@@ -164,13 +169,18 @@ def _get_vision_agent(self) -> VisionAgent:
164169
self._ensure_api_key()
165170

166171
from src.agents.vision_agent import VisionAgent
172+
from src.cli.config import get_machine_id
167173
from src.utils.openrouter_llm import create_openrouter_llm
168174

175+
# Get machine ID for cache optimization
176+
user_id = get_machine_id()
177+
169178
vision_llm = create_openrouter_llm(
170179
model=self._vision_model,
171180
api_key=self._api_key,
172181
temperature=0.3, # Slightly higher for vision tasks
173182
provider=self._provider,
183+
user_id=user_id,
174184
)
175185

176186
self._vision_agent = VisionAgent(llm=vision_llm)

src/cli/main.py

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,10 @@
2121
clear_credentials,
2222
get_config_paths,
2323
get_effective_config,
24+
is_first_run,
2425
load_config,
2526
load_credentials,
27+
mark_first_run_complete,
2628
save_config,
2729
save_credentials,
2830
update_config,
@@ -72,7 +74,7 @@
7274
typer.Option(
7375
"--output",
7476
"-o",
75-
help="Output format",
77+
help="Output format: 'text' (human-readable) or 'json' (machine-readable)",
7678
),
7779
]
7880

@@ -274,6 +276,11 @@ def init(
274276
hedit init --api-key YOUR_KEY # API mode (default)
275277
hedit init --api-key YOUR_KEY --standalone # Standalone mode
276278
"""
279+
# Show telemetry disclosure on first run
280+
if is_first_run():
281+
show_telemetry_disclosure()
282+
mark_first_run_complete()
283+
277284
# Load existing config
278285
config = load_config()
279286
creds = load_credentials()
@@ -377,6 +384,11 @@ def annotate(
377384
hedit annotate "..." --model gpt-4o-mini --temperature 0.2
378385
hedit annotate "..." --standalone # Run locally
379386
"""
387+
# Show telemetry disclosure on first run
388+
if is_first_run():
389+
show_telemetry_disclosure()
390+
mark_first_run_complete()
391+
380392
# Determine mode override
381393
mode_override = None
382394
if standalone:
@@ -476,6 +488,11 @@ def annotate_image(
476488
hedit annotate-image screen.png -o json > result.json
477489
hedit annotate-image stimulus.png --standalone # Run locally
478490
"""
491+
# Show telemetry disclosure on first run
492+
if is_first_run():
493+
show_telemetry_disclosure()
494+
mark_first_run_complete()
495+
479496
# Validate image exists
480497
if not image.exists():
481498
output.print_error(f"Image file not found: {image}")
@@ -555,6 +572,11 @@ def validate(
555572
hedit validate "Event" -o json
556573
hedit validate "Event" --standalone # Validate locally with hedtools
557574
"""
575+
# Show telemetry disclosure on first run
576+
if is_first_run():
577+
show_telemetry_disclosure()
578+
mark_first_run_complete()
579+
558580
# Determine mode override
559581
mode_override = None
560582
if standalone:
@@ -744,6 +766,36 @@ def health(
744766
raise typer.Exit(1) from None
745767

746768

769+
def show_telemetry_disclosure() -> None:
770+
"""Display first-run telemetry disclosure notice."""
771+
from rich.panel import Panel
772+
773+
disclosure_text = (
774+
"[bold]Welcome to HEDit![/]\n\n"
775+
"HEDit collects anonymous usage data to improve the annotation service:\n"
776+
" • Input descriptions and generated annotations\n"
777+
" • Model performance metrics (latency, iterations)\n"
778+
" • Validation results\n\n"
779+
"[dim]What is NOT collected:[/]\n"
780+
" • API keys or credentials\n"
781+
" • Personal information\n"
782+
" • File paths or system details\n\n"
783+
"[bold cyan]To disable:[/] hedit config set telemetry.enabled false\n"
784+
"[bold cyan]To view config:[/] hedit config show"
785+
)
786+
787+
panel = Panel(
788+
disclosure_text,
789+
title="[bold]Privacy & Data Collection[/]",
790+
border_style="cyan",
791+
padding=(1, 2),
792+
)
793+
794+
console.print()
795+
console.print(panel)
796+
console.print()
797+
798+
747799
def cli() -> None:
748800
"""Entry point for CLI."""
749801
app()

src/telemetry/__init__.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
"""Telemetry module for HEDit.
2+
3+
Provides opt-out telemetry collection for service improvement and model fine-tuning.
4+
"""
5+
6+
from src.telemetry.collector import DEFAULT_MODEL_BLACKLIST, TelemetryCollector
7+
from src.telemetry.schema import TelemetryEvent
8+
from src.telemetry.storage import CloudflareKVStorage, LocalFileStorage, TelemetryStorage
9+
10+
__all__ = [
11+
"TelemetryEvent",
12+
"TelemetryCollector",
13+
"TelemetryStorage",
14+
"LocalFileStorage",
15+
"CloudflareKVStorage",
16+
"DEFAULT_MODEL_BLACKLIST",
17+
]

0 commit comments

Comments
 (0)