Skip to content

Commit 1909ab5

Browse files
authored
Merge pull request #106 from MLAI-AUS-Inc/codex/domain-scan-routing-fix
Fix article routing for content threads
2 parents 8f850af + 37d74e7 commit 1909ab5

File tree

5 files changed

+355
-14
lines changed

5 files changed

+355
-14
lines changed

roo-standalone/roo/agent.py

Lines changed: 213 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
The agent receives user messages, selects appropriate skills,
55
and executes them to generate responses.
66
"""
7+
import re
8+
from datetime import datetime, timedelta, timezone
79
from typing import Optional, Dict, Any, List
810
from pathlib import Path
911

@@ -14,6 +16,9 @@
1416
from .slack_client import get_thread_messages
1517

1618

19+
THREAD_CONTEXT_TTL = timedelta(minutes=20)
20+
21+
1722
class RooAgent:
1823
"""
1924
Agentic Slack bot that routes requests to skills.
@@ -32,10 +37,11 @@ def __init__(self):
3237
"""Initialize the Roo agent with loaded skills."""
3338
settings = get_settings()
3439
skills_dir = Path(settings.SKILLS_DIR)
35-
40+
3641
self.skills = load_skills(skills_dir)
3742
self.skill_executor = SkillExecutor()
38-
43+
self._thread_skill_context: Dict[str, Dict[str, Any]] = {}
44+
3945
print(f"🦘 RooAgent initialized with {len(self.skills)} skills:")
4046
for skill in self.skills:
4147
print(f" - {skill.name}: {skill.description}")
@@ -86,10 +92,11 @@ async def handle_mention(
8692
return fast_result
8793

8894
# 2. Select appropriate skill (LLM Routing)
89-
skill = await self._select_skill(clean_text, thread_history, channel_id)
90-
95+
skill = await self._select_skill(clean_text, thread_history, channel_id, thread_ts)
96+
9197
if skill:
9298
print(f"🎯 Selected skill: {skill.name}")
99+
self._remember_selected_skill(skill.name, channel_id, thread_ts, clean_text)
93100
result = await self.skill_executor.execute(
94101
skill=skill,
95102
text=clean_text,
@@ -113,7 +120,178 @@ async def handle_mention(
113120
"skill_used": None,
114121
"data": None
115122
}
116-
123+
124+
def remember_thread_context(
125+
self,
126+
skill_name: str,
127+
channel_id: Optional[str],
128+
thread_ts: Optional[str],
129+
*,
130+
domain: Optional[str] = None,
131+
workflow: Optional[str] = None,
132+
) -> None:
133+
"""Persist recent thread routing context so follow-ups stay on the right skill."""
134+
thread_key = self._thread_key(channel_id, thread_ts)
135+
if not thread_key or not skill_name:
136+
return
137+
138+
self._thread_skill_context[thread_key] = {
139+
"skill_name": skill_name,
140+
"domain": domain,
141+
"workflow": workflow,
142+
"updated_at": datetime.now(timezone.utc),
143+
}
144+
145+
def _remember_selected_skill(
146+
self,
147+
skill_name: str,
148+
channel_id: Optional[str],
149+
thread_ts: Optional[str],
150+
text: str,
151+
) -> None:
152+
workflow = None
153+
text_lower = text.lower()
154+
155+
if skill_name == "content-factory":
156+
if any(term in text_lower for term in ("scan", "codebase", "repository", "repo")):
157+
workflow = "scan"
158+
elif any(term in text_lower for term in ("scaffold", "articles directory", "blog page")):
159+
workflow = "scaffold"
160+
elif any(term in text_lower for term in ("research", "keyword", "topic")):
161+
workflow = "research"
162+
elif any(term in text_lower for term in ("write", "article", "blog")):
163+
workflow = "write"
164+
165+
self.remember_thread_context(
166+
skill_name,
167+
channel_id,
168+
thread_ts,
169+
domain=self._extract_domain(text),
170+
workflow=workflow,
171+
)
172+
173+
def _thread_key(self, channel_id: Optional[str], thread_ts: Optional[str]) -> Optional[str]:
174+
if not channel_id or not thread_ts:
175+
return None
176+
return f"{channel_id}:{thread_ts}"
177+
178+
def _get_thread_context(
179+
self,
180+
channel_id: Optional[str],
181+
thread_ts: Optional[str],
182+
) -> Optional[Dict[str, Any]]:
183+
thread_key = self._thread_key(channel_id, thread_ts)
184+
if not thread_key:
185+
return None
186+
187+
context = self._thread_skill_context.get(thread_key)
188+
if not context:
189+
return None
190+
191+
updated_at = context.get("updated_at")
192+
if not updated_at or datetime.now(timezone.utc) - updated_at > THREAD_CONTEXT_TTL:
193+
self._thread_skill_context.pop(thread_key, None)
194+
return None
195+
196+
return context
197+
198+
def _get_skill_by_name(self, skill_name: str) -> Optional[Skill]:
199+
return next((skill for skill in self.skills if skill.name == skill_name), None)
200+
201+
def _extract_domain(self, text: str) -> Optional[str]:
202+
match = re.search(r'\b(?:https?://)?([a-z0-9][a-z0-9.-]+\.[a-z]{2,})\b', text.lower())
203+
return match.group(1) if match else None
204+
205+
def _looks_like_content_request(self, text: str) -> bool:
206+
patterns = (
207+
r'\barticle\b',
208+
r'\bblog(?:\s+post)?\b',
209+
r'\bseo\b',
210+
r'\bkeyword\b',
211+
r'\btopic\b',
212+
r'\bwrite\b.*\b(article|blog(?:\s+post)?)\b',
213+
r'\bresearch\b.*\b(article|topic|keyword)\b',
214+
r'\bfor my domain\b',
215+
)
216+
return any(re.search(pattern, text) for pattern in patterns)
217+
218+
def _looks_like_points_request(self, text: str) -> bool:
219+
patterns = (
220+
r'\bpoints?\b',
221+
r'\bbalance\b',
222+
r'\bcoworking\b',
223+
r'\brewards?\b',
224+
r'\bclaim\s+task\b',
225+
r'\bcreate\s+(?:a\s+)?task\b',
226+
r'\btask\s+create\b',
227+
r'\bworth\s+\d+\s+points?\b',
228+
)
229+
return any(re.search(pattern, text) for pattern in patterns)
230+
231+
def _looks_like_content_follow_up(self, text: str) -> bool:
232+
patterns = (
233+
r'\bwrite\b',
234+
r'\bresearch\b',
235+
r'\barticle\b',
236+
r'\bblog\b',
237+
r'\bkeyword\b',
238+
r'\btopic\b',
239+
r'\bdraft\b',
240+
r'\boutline\b',
241+
r'\bfor my domain\b',
242+
)
243+
return any(re.search(pattern, text) for pattern in patterns)
244+
245+
def _keyword_matches(self, text: str, keyword: str) -> bool:
246+
keyword = keyword.lower().strip()
247+
if not keyword:
248+
return False
249+
250+
escaped = re.escape(keyword)
251+
pattern = rf'(?<!\w){escaped}(?!\w)'
252+
return re.search(pattern, text) is not None
253+
254+
def _select_skill_from_triggers(
255+
self,
256+
text: str,
257+
thread_context: Optional[Dict[str, Any]] = None,
258+
) -> Optional[Skill]:
259+
text_lower = text.lower().strip()
260+
content_skill = self._get_skill_by_name("content-factory")
261+
262+
if content_skill and self._looks_like_content_request(text_lower):
263+
return content_skill
264+
265+
if (
266+
thread_context
267+
and thread_context.get("skill_name") == "content-factory"
268+
and content_skill
269+
and self._looks_like_content_follow_up(text_lower)
270+
and not self._looks_like_points_request(text_lower)
271+
):
272+
return content_skill
273+
274+
skill_scores: Dict[str, int] = {}
275+
for skill in self.skills:
276+
matched_keywords = [
277+
keyword for keyword in skill.trigger_keywords
278+
if self._keyword_matches(text_lower, keyword)
279+
]
280+
if matched_keywords:
281+
skill_scores[skill.name] = sum(len(keyword.split()) * 3 + len(keyword) for keyword in matched_keywords)
282+
283+
if not skill_scores:
284+
return None
285+
286+
ranked = sorted(skill_scores.items(), key=lambda item: item[1], reverse=True)
287+
best_skill_name, best_score = ranked[0]
288+
runner_up_score = ranked[1][1] if len(ranked) > 1 else -1
289+
290+
if len(ranked) == 1 or best_score >= runner_up_score + 4:
291+
return self._get_skill_by_name(best_skill_name)
292+
293+
return None
294+
117295
async def _try_fast_path(
118296
self,
119297
text: str,
@@ -274,17 +452,21 @@ def _clean_mention(self, text: str) -> str:
274452
cleaned = ' '.join(cleaned.split())
275453
return cleaned.strip()
276454

277-
async def _select_skill(self, text: str, history: List[dict] = None, channel_id: Optional[str] = None) -> Optional[Skill]:
455+
async def _select_skill(
456+
self,
457+
text: str,
458+
history: List[dict] = None,
459+
channel_id: Optional[str] = None,
460+
thread_ts: Optional[str] = None,
461+
) -> Optional[Skill]:
278462
"""Use LLM to decide which skill to use."""
279463
if not self.skills:
280464
return None
281465

282-
# First check trigger keywords for quick matching
283-
text_lower = text.lower()
284-
for skill in self.skills:
285-
for keyword in skill.trigger_keywords:
286-
if keyword.lower() in text_lower:
287-
return skill
466+
thread_context = self._get_thread_context(channel_id, thread_ts)
467+
trigger_skill = self._select_skill_from_triggers(text, thread_context)
468+
if trigger_skill:
469+
return trigger_skill
288470

289471
# Resolve channel name for priority matching
290472
channel_priority_hint = ""
@@ -314,22 +496,40 @@ async def _select_skill(self, text: str, history: List[dict] = None, channel_id:
314496
history_str = "\n".join([f"{msg.get('user')}: {msg.get('text')}" for msg in history[:-1]]) # Skip last as it's the current request usually
315497
history_context = f"Conversation History:\n{history_str}\n"
316498

499+
thread_context_hint = ""
500+
if thread_context:
501+
thread_context_hint = (
502+
"Active Thread Context:\n"
503+
f"- last skill: {thread_context.get('skill_name')}\n"
504+
f"- domain: {thread_context.get('domain') or 'unknown'}\n"
505+
f"- workflow: {thread_context.get('workflow') or 'unknown'}\n"
506+
)
507+
317508
prompt = f"""You are a skill router. Given the user's message and conversation context, decide which skill to use.
318509
319510
Available skills:
320511
{skill_descriptions}
321512
- none: Use this if no skill is appropriate (general conversation)
322513
{channel_priority_hint}
323514
{history_context}
515+
{thread_context_hint}
324516
User message: "{text}"
325517
518+
Prefer content-factory for requests about writing, researching, or planning articles, blog posts, SEO topics, keywords, or content for a domain.
519+
520+
Examples:
521+
- "please research the best article for me to write" -> content-factory
522+
- "write me an article about how to build an ai agent harness for long-running specific tasks" -> content-factory
523+
- "create a task called fix docs worth 5 points" -> mlai-points
524+
326525
Respond with ONLY the skill name (e.g., "connect_users" or "none"):"""
327526

328527
try:
528+
settings = get_settings()
329529
response = await chat([
330530
{"role": "system", "content": "You are a skill router. Respond with only the skill name."},
331531
{"role": "user", "content": prompt}
332-
])
532+
], model=settings.ROUTER_MODEL, max_tokens=32, reasoning_effort="low")
333533

334534
skill_name = response.content.strip().lower()
335535
# Normalize: both underscores and hyphens should match

roo-standalone/roo/config.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,8 @@ class Settings(BaseSettings):
4242
LOG_LEVEL: str = "INFO"
4343
SKILLS_DIR: str = "skills"
4444
TIMEZONE: str = "Australia/Melbourne"
45-
45+
ROUTER_MODEL: str = "gpt-5.4"
46+
4647
@property
4748
def default_llm_provider(self) -> str:
4849
"""Determine default LLM provider based on available keys."""

roo-standalone/roo/main.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,28 @@
2121
_pending_intents: dict = {}
2222

2323

24+
def _remember_content_thread_context(
25+
channel_id: str | None,
26+
thread_ts: str | None,
27+
domain: str | None,
28+
workflow: str,
29+
) -> None:
30+
"""Keep content-factory as the active skill for follow-ups in this thread."""
31+
if not channel_id or not thread_ts:
32+
return
33+
34+
try:
35+
get_agent().remember_thread_context(
36+
"content-factory",
37+
channel_id,
38+
thread_ts,
39+
domain=domain,
40+
workflow=workflow,
41+
)
42+
except Exception as e:
43+
print(f"⚠️ Failed to persist content thread context: {e}")
44+
45+
2446
async def _medhack_daily_case_loop():
2547
"""Background task that posts a new diagnosis case each day."""
2648
import asyncio
@@ -787,6 +809,7 @@ async def content_factory_callback(request: Request):
787809
pillar_names = payload.get("pillar_names")
788810

789811
print(f"📦 Scan complete for {domain}: {components_count} components, {pillar_count} pillars")
812+
_remember_content_thread_context(channel_id, thread_ts, domain, "scan")
790813

791814
if components_count > 0:
792815
# Build component summary: "ArticleHeroHeader, ArticleCard, ArticleFAQ, +27 more"
@@ -954,6 +977,7 @@ async def content_factory_callback(request: Request):
954977
build_verified = payload.get("build_verified", False)
955978

956979
print(f"📁 Scaffold complete for {domain}: PR={pr_url}")
980+
_remember_content_thread_context(channel_id, thread_ts, domain, "scaffold")
957981

958982
if already_exists:
959983
blocks = [

0 commit comments

Comments
 (0)