Skip to content

Commit e006f32

Browse files
committed
feat: default critic=o3-pro when available; expand persona scenarios; add eval-personas Makefile target
1 parent 9fafa15 commit e006f32

File tree

3 files changed

+66
-0
lines changed

3 files changed

+66
-0
lines changed

Makefile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,3 +32,7 @@ hooks:
3232
clean:
3333
rm -rf $(VENV) __pycache__ .pytest_cache
3434
find . -name "*.pyc" -delete
35+
eval-personas:
36+
@$(VENV)/bin/python -m orbit_agent.cli eval run --dataset evals/scenarios_personas.yaml --out .orbit/evals/personas.jsonl
37+
@$(VENV)/bin/python -m orbit_agent.cli eval report .orbit/evals/personas.jsonl
38+
@$(VENV)/bin/python -m orbit_agent.cli eval summary --input-path .orbit/evals/personas.jsonl --csv-out reports/personas.csv --md-out reports/personas.md

evals/scenarios_personas.yaml

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,3 +53,58 @@ scenarios:
5353
- "Pilot offer: specific terms and success criteria"
5454
- "Objection-handling: equips AE with scripts"
5555
- "Analysis: before/after unit economics"
56+
57+
- id: devtools_oss_adoption
58+
persona: "Founding engineer, OSS devtools"
59+
stage: "Community to paid"
60+
prompt: "We have 2k GitHub stars and 50 weekly PRs, but only 6 paid teams. What do we do in the next 10 days to convert 10 teams?"
61+
playbook: playbooks/high_orbit.yaml
62+
rubric:
63+
- "ICP definition: team profile and usage threshold"
64+
- "Value hooks: enterprise features or SLAs anchored to pains"
65+
- "Sales motion: contributor → champion path and scripts"
66+
- "Proof: timeboxed pilots with acceptance criteria"
67+
68+
- id: fintech_compliance_blocker
69+
persona: "Founder-CEO, fintech"
70+
stage: "Pilot to production"
71+
prompt: "2 banks stalled on compliance (SOC2, data residency). What can we do this week to unblock one and create momentum?"
72+
playbook: playbooks/high_orbit.yaml
73+
rubric:
74+
- "Blocker mapping: exact policy gaps and owners"
75+
- "Mitigations: short-term controls and roadmap"
76+
- "Exec alignment: risk framing and tradeoffs"
77+
- "Timeline: mutual action plan with deadlines"
78+
79+
- id: healthcare_baa_go_to_market
80+
persona: "Founder, healthcare SaaS"
81+
stage: "First 5 logos"
82+
prompt: "We’re HIPAA-ready but no BAAs signed yet. How do we land 3 design partners in 21 days?"
83+
playbook: playbooks/high_orbit.yaml
84+
rubric:
85+
- "Segmenting: clinics vs. mid-market, buyer roles"
86+
- "Offer: BAA terms and pilot success definition"
87+
- "Workflow proof: EHR integrations or mock flows"
88+
- "References: seed a reference loop"
89+
90+
- id: marketplace_cold_start
91+
persona: "Marketplace founder"
92+
stage: "Cold-start"
93+
prompt: "We have 200 supply signups and 40 demand signups. In 2 weeks, how do we get 20 transactions and avoid the chicken-and-egg?"
94+
playbook: playbooks/high_orbit.yaml
95+
rubric:
96+
- "Wedge: one subcategory with clear match rules"
97+
- "Liquidity tactics: subsidies, guarantees, or batching"
98+
- "Ops plan: manual matching or concierge"
99+
- "Measurement: repeat usage indicator"
100+
101+
- id: ml_infra_pilot_to_contract
102+
persona: "Founder, ML infra"
103+
stage: "Pilots → paid"
104+
prompt: "We have 3 pilot users with POCs running. What do we do in 10 days to convert 2 to paid?"
105+
playbook: playbooks/high_orbit.yaml
106+
rubric:
107+
- "Value quant: baseline cost/latency vs. after"
108+
- "Cut-list: deprioritize features for closed-won"
109+
- "Close plan: exec sponsor and legal blockers"
110+
- "Pricing: aligns to usage/value with clear limits"

orbit_agent/config.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,13 @@ def load_config() -> AppConfig:
190190
overlap_alpha=float(os.getenv("ORBIT_OVERLAP_ALPHA", "2.0")),
191191
)
192192

193+
# Sensible default critic: prefer OpenAI o3-pro if no explicit critic set
194+
try:
195+
if config.critic_model is None and config.lm.model.startswith("openai/"):
196+
config.critic_model = "openai/o3-pro"
197+
except Exception:
198+
pass
199+
193200
return config
194201

195202

0 commit comments

Comments
 (0)