Fix tool-call policy, parsing, and config fallbacks

haasonsaas · haasonsaas · commit 023d2206be0b · 2025-12-21T22:28:42.000-08:00
diff --git a/micro_agent/agent.py b/micro_agent/agent.py
@@ -19,11 +19,7 @@ def __init__(self, max_steps: int = 6, use_tool_calls: bool | None = None):
         self.finalize = None  # fallback finalize handled via LM prompt
         self._tool_list = [t.spec() for t in TOOLS.values()]
         self.max_steps = max_steps
-        self._provider = None
-        try:
-            self._provider = (self.lm.model.split("/", 1)[0] if getattr(self.lm, "model", None) else None)
-        except Exception:
-            self._provider = None
+        self._provider = self._infer_provider(self.lm)
         # Determine function-calls mode
         env_override = os.getenv("USE_TOOL_CALLS")
         if isinstance(use_tool_calls, bool):
@@ -37,10 +33,36 @@ def __init__(self, max_steps: int = 6, use_tool_calls: bool | None = None):
             try:
                 from dspy.adapters import JSONAdapter
                 dspy.settings.configure(adapter=JSONAdapter())
+                if to_dspy_tools():
+                    self.planner = dspy.Predict(PlanWithTools)
+                    self._load_compiled_demos()
+                else:
+                    self._use_tool_calls = False
             except Exception:
-                pass
-            self.planner = dspy.Predict(PlanWithTools)
-            self._load_compiled_demos()
+                self._use_tool_calls = False
+
+    def _infer_provider(self, lm) -> str | None:
+        try:
+            prov = getattr(lm, "provider", None) or getattr(lm, "_provider", None)
+            if isinstance(prov, str) and prov.strip():
+                return prov.strip().lower()
+        except Exception:
+            pass
+        try:
+            cls_name = lm.__class__.__name__.lower()
+            if "openai" in cls_name:
+                return "openai"
+            if "ollama" in cls_name:
+                return "ollama"
+        except Exception:
+            pass
+        try:
+            model = getattr(lm, "model", None)
+            if isinstance(model, str) and "/" in model:
+                return model.split("/", 1)[0].lower()
+        except Exception:
+            pass
+        return None
 
     def _load_compiled_demos(self):
         import json as _json
@@ -142,7 +164,9 @@ def needs_math(q: str) -> bool:
 
         def needs_time(q: str) -> bool:
             ql = q.lower()
-            return any(w in ql for w in ["time", "date", "utc", "current time", "now"])
+            if "current time" in ql or "current date" in ql:
+                return True
+            return re.search(r"\b(time|times|date|dates|utc|now|today|tomorrow|yesterday|timestamp|datetime)\b", ql) is not None
 
         def used_tool(state, name: str) -> bool:
             return any(step.get("tool") == name for step in state)
@@ -216,8 +240,19 @@ def _accumulate_usage(input_text: str = "", output_text: str = ""):
                 # If tool calls are proposed, execute them.
                 calls = getattr(pred, 'tool_calls', None)
                 executed_any = False
+                had_validation_error = False
+                had_policy_violation = False
                 if calls and getattr(calls, 'tool_calls', None):
-                    for call in calls.tool_calls:
+                    call_list = list(calls.tool_calls)
+                    if len(call_list) > 1:
+                        had_policy_violation = True
+                        state.append({
+                            "tool": "⛔️policy_violation",
+                            "args": {"reason": "multiple_tool_calls", "count": len(call_list)},
+                            "observation": "Model returned multiple tool calls in one step; executing only the first.",
+                        })
+                        call_list = call_list[:1]
+                    for call in call_list:
                         try:
                             name = getattr(call, 'name')
                             args = getattr(call, 'args') or {}
@@ -226,6 +261,7 @@ def _accumulate_usage(input_text: str = "", output_text: str = ""):
                         # Validate/execute; on validation error, record and continue planning
                         obs = run_tool(name, args)
                         if isinstance(obs, dict) and "error" in obs and "validation" in obs.get("error", ""):
+                            had_validation_error = True
                             state.append({
                                 "tool": "⛔️validation_error",
                                 "args": {"name": name, "args": args},
@@ -239,6 +275,8 @@ def _accumulate_usage(input_text: str = "", output_text: str = ""):
                 # Check finalization.
                 final = getattr(pred, 'final', None)
                 if final:
+                    if had_policy_violation or had_validation_error:
+                        continue
                     if must_math and not used_tool(state, "calculator"):
                         state.append({"tool": "⛔️policy_violation", "args": {}, "observation": "Finalize before calculator (OpenAI path)."})
                         # If tools were suggested and executed this step, iterate; else force tool suggestion by continuing.
@@ -399,6 +437,9 @@ def _accumulate_usage(input_text: str = "", output_text: str = ""):
                     "tool_calls": tool_calls,
                     "provider": self._provider,
                     "model": getattr(self.lm, "model", None),
+                    "cost": total_cost,
+                    "input_tokens": total_in_tokens,
+                    "output_tokens": total_out_tokens,
                 }
                 return p
 
diff --git a/micro_agent/config.py b/micro_agent/config.py
@@ -43,7 +43,8 @@ def __call__(self, *, prompt: str, **kwargs):
                     cands = [c.strip() for c in cands if c.strip()]
                     expr = max(cands, key=len) if cands else "2+2"
                     return _json.dumps({"tool": {"name": "calculator", "args": {"expression": expr}}})
-                if any(w in ql for w in ["time","date","utc","current time","now"]):
+                if ("current time" in ql or "current date" in ql or
+                    re.search(r"\b(time|times|date|dates|utc|now|today|tomorrow|yesterday|timestamp|datetime)\b", ql)):
                     return _json.dumps({"tool": {"name": "now", "args": {"timezone": "utc"}}})
                 return _json.dumps({"final": {"answer": "ok"}})
         dspy.settings.configure(lm=_MockLM(), track_usage=True)
@@ -75,10 +76,14 @@ def _try(name, fn):
 
     # Option 2: OpenAI (default)
     openai_model = os.getenv("OPENAI_MODEL", "gpt-4o-mini")
-    if _try("dspy.OpenAI", lambda: dspy.OpenAI(model=openai_model, temperature=temperature, max_tokens=max_tokens)):
-        return
-    if _try("dspy.LM(openai/<model>)", lambda: dspy.LM(f"openai/{openai_model}")):
-        return
+    openai_key = os.getenv("OPENAI_API_KEY")
+    if openai_key:
+        if _try("dspy.OpenAI", lambda: dspy.OpenAI(model=openai_model, temperature=temperature, max_tokens=max_tokens)):
+            return
+        if _try("dspy.LM(openai/<model>)", lambda: dspy.LM(f"openai/{openai_model}")):
+            return
+    else:
+        tried.append(("openai", "missing OPENAI_API_KEY"))
 
     # If we got here, all backends failed: fall back to mock
     class _FallbackMockLM:
diff --git a/micro_agent/runtime.py b/micro_agent/runtime.py
@@ -44,16 +44,40 @@ def dump_trace(trace_id: str, question: str, steps: List[Step], answer: str, *,
         f.write(json.dumps(rec, ensure_ascii=False) + "\n")
     return path
 
-_JSON_RE = re.compile(r"\{.*\}", re.S)
-
 def extract_json_block(text: str) -> str:
     """
     Extract the first {...} block to survive models adding prose or code fences.
     """
-    m = _JSON_RE.search(text)
-    if not m:
-        raise ValueError(f"No JSON object found in: {text[:200]!r}")
-    return m.group(0)
+    if not text:
+        raise ValueError("No JSON object found in empty text")
+    start = None
+    depth = 0
+    in_str = False
+    escape = False
+    for i, ch in enumerate(text):
+        if start is None:
+            if ch == "{":
+                start = i
+                depth = 1
+            continue
+        if in_str:
+            if escape:
+                escape = False
+            elif ch == "\\":
+                escape = True
+            elif ch == "\"":
+                in_str = False
+            continue
+        if ch == "\"":
+            in_str = True
+            continue
+        if ch == "{":
+            depth += 1
+        elif ch == "}":
+            depth -= 1
+            if depth == 0:
+                return text[start:i + 1]
+    raise ValueError(f"No JSON object found in: {text[:200]!r}")
 
 def parse_decision_text(text: str) -> Dict[str, Any]:
     """Parse a model decision string into a dict.
@@ -73,6 +97,8 @@ def parse_decision_text(text: str) -> Dict[str, Any]:
     if json_repair is not None:
         try:
             repaired = json_repair.repair(block)
+            if isinstance(repaired, dict):
+                return repaired
             return json.loads(repaired)
         except Exception:
             pass
diff --git a/micro_agent/signatures.py b/micro_agent/signatures.py
@@ -1,6 +1,13 @@
 from __future__ import annotations
 import dspy
-from dspy.adapters import Tool as DSpyTool, ToolCalls
+try:
+    from dspy.adapters import Tool as DSpyTool, ToolCalls
+except Exception:
+    try:
+        from dspy.adapters.types import Tool as DSpyTool, ToolCalls  # type: ignore
+    except Exception:
+        DSpyTool = object  # type: ignore
+        ToolCalls = object  # type: ignore
 
 class PlanOrAct(dspy.Signature):
     """Decide next step: either call a tool with JSON args or finalize.
diff --git a/micro_agent/tools.py b/micro_agent/tools.py
@@ -28,7 +28,7 @@ def spec(self) -> Dict[str, Any]:
 ALLOWED_CALLS = {"fact": lambda x: math.factorial(int(x))}
 def _eval_expr(node):
     # Python 3.10+: numeric literals appear as ast.Constant
-    if isinstance(node, ast.Constant) and isinstance(node.value, (int, float)):
+    if isinstance(node, ast.Constant) and isinstance(node.value, (int, float)) and not isinstance(node.value, bool):
         return node.value
     if isinstance(node, ast.BinOp) and type(node.op) in ALLOWED_OPS:
         lv, rv = _eval_expr(node.left), _eval_expr(node.right)
@@ -37,18 +37,31 @@ def _eval_expr(node):
         if isinstance(node.op, ast.Pow):
             if isinstance(rv, (int, float)) and abs(rv) > MAX_EXPONENT:
                 raise ValueError("exponent too large")
-        return ALLOWED_OPS[type(node.op)](lv, rv)
+        result = ALLOWED_OPS[type(node.op)](lv, rv)
+        if isinstance(result, complex):
+            raise ValueError("complex results are not supported")
+        return result
     if isinstance(node, ast.UnaryOp) and type(node.op) in ALLOWED_OPS:
         v = _eval_expr(node.operand)
         if isinstance(v, (int, float)) and abs(v) > MAX_ABS_NUMBER: raise ValueError("number too large")
-        return ALLOWED_OPS[type(node.op)](v)
+        result = ALLOWED_OPS[type(node.op)](v)
+        if isinstance(result, complex):
+            raise ValueError("complex results are not supported")
+        return result
     if isinstance(node, ast.Call) and isinstance(node.func, ast.Name) and node.func.id in ALLOWED_CALLS:
         if len(node.args) != 1:
             raise ValueError("Invalid arguments")
         arg = _eval_expr(node.args[0])
-        if isinstance(arg, (int, float)) and arg > MAX_FACTORIAL_N:
+        if not isinstance(arg, (int, float)) or isinstance(arg, bool):
+            raise ValueError("factorial requires a number")
+        if isinstance(arg, float) and not arg.is_integer():
+            raise ValueError("factorial requires an integer")
+        arg_int = int(arg)
+        if arg_int < 0:
+            raise ValueError("factorial requires a non-negative integer")
+        if arg_int > MAX_FACTORIAL_N:
             raise ValueError("factorial too large")
-        return ALLOWED_CALLS[node.func.id](arg)
+        return ALLOWED_CALLS[node.func.id](arg_int)
     if isinstance(node, ast.Expression): return _eval_expr(node.body)
     raise ValueError("Disallowed expression")
 
@@ -68,7 +81,10 @@ def safe_eval_math(expr: str) -> float:
     # cap complexity
     if sum(1 for _ in ast.walk(tree)) > MAX_ALLOWED_OPS_NODES:
         raise ValueError("expression too complex")
-    return _eval_expr(tree)
+    result = _eval_expr(tree)
+    if isinstance(result, complex):
+        raise ValueError("complex results are not supported")
+    return result
 
 def tool_calculator(args: Dict[str, Any]):
     expr = str(args.get("expression", "")).strip()
diff --git a/tests/test_regressions.py b/tests/test_regressions.py
@@ -0,0 +1,23 @@
+import pytest
+
+from micro_agent.config import configure_lm
+from micro_agent.agent import MicroAgent
+from micro_agent.tools import safe_eval_math
+
+
+def test_no_false_time_trigger_on_update(monkeypatch):
+    monkeypatch.setenv("LLM_PROVIDER", "mock")
+    configure_lm()
+    agent = MicroAgent(max_steps=2)
+    pred = agent("Please update the docs.")
+    assert not any(step.get("tool") == "now" for step in (pred.trace or []))
+
+
+def test_factorial_rejects_non_integer():
+    with pytest.raises(ValueError):
+        safe_eval_math("fact(3.5)")
+
+
+def test_complex_results_rejected():
+    with pytest.raises(ValueError):
+        safe_eval_math("(-1)^(0.5)")