Avoid date-as-math detection and fix mock parsing

haasonsaas · haasonsaas · commit 83f359c78ed2 · 2025-12-22T11:22:32.000-08:00
diff --git a/micro_agent/agent.py b/micro_agent/agent.py
@@ -163,10 +163,17 @@ def _normalize_text(q: str) -> str:
                 .replace("\u2014", "-")
             )
 
+        def _strip_date_literals(q: str) -> str:
+            # Remove common date patterns to avoid misclassifying as math (e.g., 2025-12-22).
+            q = re.sub(r"\b\d{4}[-/]\d{1,2}[-/]\d{1,2}\b", " DATE ", q)
+            q = re.sub(r"\b\d{1,2}[-/]\d{1,2}[-/]\d{2,4}\b", " DATE ", q)
+            return q
+
         def needs_math(q: str) -> bool:
             qn = _normalize_text(q)
+            qn_math = _strip_date_literals(qn)
             ql = qn.lower()
-            if re.search(r"[0-9].*[+\-*/%]", qn):
+            if re.search(r"[0-9].*[+\-*/%]", qn_math):
                 return True
             if re.search(r"\b\d+(?:\.\d+)?\s*(?:x|times|multiplied by)\s*\d+(?:\.\d+)?\b", ql):
                 return True
@@ -232,7 +239,7 @@ def _accumulate_usage(input_text: str = "", output_text: str = ""):
                 pass
 
         def _infer_expression(q: str) -> str:
-            qn = _normalize_text(q)
+            qn = _strip_date_literals(_normalize_text(q))
             ql = qn.lower()
             # Handle "divide X by Y" and "subtract X from Y"
             m = re.search(r"\bdivide\s+(\d+(?:\.\d+)?)\s+by\s+(\d+(?:\.\d+)?)\b", ql)
diff --git a/micro_agent/config.py b/micro_agent/config.py
@@ -31,17 +31,23 @@ class _MockLM:
             model = "mock/local"
             def __call__(self, *, prompt: str, **kwargs):
                 import re, json as _json
-                qmatch = re.search(r"Question:\s*(.*)", prompt, re.S)
-                question = qmatch.group(1).strip() if qmatch else prompt
+                qmatch = re.search(r"\nQuestion:\s*(.*?)\n\nState:", prompt, re.S)
+                if qmatch:
+                    question = qmatch.group(1).strip()
+                else:
+                    qs = re.findall(r"\bQuestion:\s*(.*)", prompt)
+                    question = qs[-1].strip() if qs else prompt
                 qn = (question
                       .replace("\u00d7", "x")
                       .replace("\u00f7", "/")
                       .replace("\u2212", "-")
                       .replace("\u2013", "-")
                       .replace("\u2014", "-"))
+                qn_math = re.sub(r"\b\d{4}[-/]\d{1,2}[-/]\d{1,2}\b", " DATE ", qn)
+                qn_math = re.sub(r"\b\d{1,2}[-/]\d{1,2}[-/]\d{2,4}\b", " DATE ", qn_math)
                 ql = qn.lower()
                 # heuristic: suggest calculator/now/final
-                if (re.search(r"[0-9].*[+\-*/%]", qn) or
+                if (re.search(r"[0-9].*[+\-*/%]", qn_math) or
                     re.search(r"\b\d+(?:\.\d+)?\s*(?:x|times|multiplied by|plus|minus|add|added to|subtract|subtracted by|divide|divided by|over)\s*\d+(?:\.\d+)?\b", ql) or
                     (re.search(r"\d", ql) and any(w in ql for w in [
                         "add","sum","plus","minus","subtract","multiply","divide","total","power","factorial","compute","calculate","!","**","^"
@@ -62,9 +68,13 @@ def __call__(self, *, prompt: str, **kwargs):
                         m = re.search(r"\b(\d+(?:\.\d+)?)\s*(?:divide|divided by|over)\s*(\d+(?:\.\d+)?)\b", ql)
                         if m:
                             expr = f"{m.group(1)}/{m.group(2)}"
+                    if expr is None and ("add" in ql or "sum" in ql):
+                        nums = re.findall(r"\b\d+(?:\.\d+)?\b", qn_math)
+                        if len(nums) >= 2:
+                            expr = "+".join(nums)
                     # crude expression extraction fallback
                     if expr is None:
-                        cands = re.findall(r"[0-9\+\-\*/%\(\)\.!\^\s]+", qn)
+                        cands = re.findall(r"[0-9\+\-\*/%\(\)\.!\^\s]+", qn_math)
                         cands = [c.strip() for c in cands if c.strip()]
                         expr = max(cands, key=len) if cands else "2+2"
                     return _json.dumps({"tool": {"name": "calculator", "args": {"expression": expr}}})
diff --git a/tests/test_regressions.py b/tests/test_regressions.py
@@ -66,3 +66,21 @@ def test_now_local_has_offset():
 def test_now_invalid_timezone_validation():
     obs = run_tool("now", {"timezone": "pst"})
     assert "error" in obs and "validation" in obs["error"]
+
+
+def test_date_like_not_math(monkeypatch):
+    monkeypatch.setenv("LLM_PROVIDER", "mock")
+    configure_lm()
+    agent = MicroAgent(max_steps=3)
+    pred = agent("What's the date 2025-12-22 in UTC?")
+    assert any(step.get("tool") == "now" for step in (pred.trace or []))
+    assert not any(step.get("tool") == "calculator" for step in (pred.trace or []))
+
+
+def test_math_with_date_still_math(monkeypatch):
+    monkeypatch.setenv("LLM_PROVIDER", "mock")
+    configure_lm()
+    agent = MicroAgent(max_steps=3)
+    pred = agent("Add 2 and 2 on 2025-12-22.")
+    assert any(step.get("tool") == "calculator" for step in (pred.trace or []))
+    assert "4" in pred.answer