Revert "reasoning tokens and quoting"

raghavm243512 · raghavm243512 · commit 74f6f2c948ee · 2026-03-26T14:03:59.000-07:00
This reverts commit a20dee9.
diff --git a/src/eva/assistant/agentic/system.py b/src/eva/assistant/agentic/system.py
@@ -201,14 +201,13 @@ async def _run_tool_loop(
                     "response": response_content,
                     "prompt_tokens": llm_stats.get("prompt_tokens", 0),
                     "output_tokens": llm_stats.get("completion_tokens", 0),
-                    "reasoning_tokens": llm_stats.get("reasoning_tokens", 0),
                     "cost": llm_stats.get("cost", 0.0),
                     "cost_source": llm_stats.get("cost_source", "unknown"),
                     "stop_reason": llm_stats.get("finish_reason", "unknown"),
                     "latency": llm_stats.get("latency", 0.0),
                     "parameters": json.dumps(llm_stats.get("parameters", {})),
                     "tool_calls": json.dumps(response_tool_calls_for_stats) if response_tool_calls_for_stats else "",
-                    "reasoning": f'"{llm_stats.get("reasoning", "")}"',
+                    "reasoning": llm_stats.get("reasoning_content", ""),
                 }
                 self.agent_perf_stats.append(perf_stat)
                 logger.debug(
@@ -376,16 +375,15 @@ def save_agent_perf_stats(self) -> None:
                 fieldnames = [
                     "prompt",
                     "response",
-                    "reasoning",
                     "prompt_tokens",
                     "output_tokens",
-                    "reasoning_tokens",
                     "cost",
                     "cost_source",
                     "stop_reason",
                     "parameters",
                     "tool_calls",
                     "latency",
+                    "reasoning"
                 ]
                 writer = csv.DictWriter(f, fieldnames=fieldnames)
                 writer.writeheader()
diff --git a/src/eva/assistant/pipeline/alm_vllm.py b/src/eva/assistant/pipeline/alm_vllm.py
@@ -200,12 +200,10 @@ async def complete(
 
                 # Extract reasoning if present (OpenAI o1 and compatible models)
                 reasoning = getattr(message, "reasoning_content", None)
-                reasoning_tokens = getattr(usage, "reasoning_tokens", 0) if usage else 0
 
                 stats = {
                     "prompt_tokens": usage.prompt_tokens if usage else 0,
                     "completion_tokens": usage.completion_tokens if usage else 0,
-                    "reasoning_tokens": reasoning_tokens,
                     "finish_reason": response.choices[0].finish_reason or "unknown",
                     "model": response.model or self.model,
                     "cost": 0.0,  # Self-hosted, no API cost
diff --git a/src/eva/assistant/services/llm.py b/src/eva/assistant/services/llm.py
@@ -74,7 +74,6 @@ async def complete(
                 usage = getattr(response, "usage", None)
                 prompt_tokens = getattr(usage, "prompt_tokens", 0) if usage else 0
                 completion_tokens = getattr(usage, "completion_tokens", 0) if usage else 0
-                reasoning_tokens = getattr(usage, "reasoning_tokens", 0) if usage else 0
                 finish_reason = getattr(response.choices[0], "finish_reason", "unknown")
                 model = getattr(response, "model", self.model)
                 hidden_params = getattr(response, "_hidden_params", {}) or {}
@@ -87,7 +86,6 @@ async def complete(
                 stats = {
                     "prompt_tokens": prompt_tokens,
                     "completion_tokens": completion_tokens,
-                    "reasoning_tokens": reasoning_tokens,
                     "finish_reason": finish_reason,
                     "model": model,
                     "cost": response_cost,
diff --git a/tests/unit/assistant/test_agentic_system.py b/tests/unit/assistant/test_agentic_system.py
@@ -67,7 +67,7 @@ async def test_simple_response(self):
         llm_client.complete = AsyncMock(
             return_value=(
                 _make_llm_response("Hello, how can I help you?"),
-                {"prompt_tokens": 10, "completion_tokens": 5, "reasoning_tokens": 0, "finish_reason": "stop"},
+                {"prompt_tokens": 10, "completion_tokens": 5, "finish_reason": "stop"},
             )
         )
 
@@ -117,11 +117,11 @@ async def test_single_tool_call_then_response(self):
             side_effect=[
                 (
                     _make_llm_response("What if there is text here", tool_calls=[tool_call]),
-                    {"prompt_tokens": 20, "completion_tokens": 10, "reasoning_tokens": 0, "finish_reason": "tool_calls"},
+                    {"prompt_tokens": 20, "completion_tokens": 10, "finish_reason": "tool_calls"},
                 ),
                 (
                     _make_llm_response("Your reservation ABC123 is confirmed."),
-                    {"prompt_tokens": 30, "completion_tokens": 15, "reasoning_tokens": 0, "finish_reason": "stop"},
+                    {"prompt_tokens": 30, "completion_tokens": 15, "finish_reason": "stop"},
                 ),
             ]
         )
@@ -204,11 +204,11 @@ async def test_tool_call_with_error_result(self):
             side_effect=[
                 (
                     _make_llm_response("", tool_calls=[tool_call]),
-                    {"prompt_tokens": 20, "completion_tokens": 10, "reasoning_tokens": 0, "finish_reason": "tool_calls"},
+                    {"prompt_tokens": 20, "completion_tokens": 10, "finish_reason": "tool_calls"},
                 ),
                 (
                     _make_llm_response("I couldn't find that reservation."),
-                    {"prompt_tokens": 30, "completion_tokens": 10, "reasoning_tokens": 0, "finish_reason": "stop"},
+                    {"prompt_tokens": 30, "completion_tokens": 10, "finish_reason": "stop"},
                 ),
             ]
         )
@@ -291,7 +291,7 @@ async def test_transfer_to_agent(self):
         llm_client.complete = AsyncMock(
             return_value=(
                 _make_llm_response("", tool_calls=[tool_call]),
-                {"prompt_tokens": 20, "completion_tokens": 5, "reasoning_tokens": 0, "finish_reason": "tool_calls"},
+                {"prompt_tokens": 20, "completion_tokens": 5, "finish_reason": "tool_calls"},
             )
         )
 

Original file line number	Diff line number	Diff line change
`@@ -67,7 +67,7 @@ async def test_simple_response(self):`
`67`	`67`	`llm_client.complete = AsyncMock(`
`68`	`68`	`return_value=(`
`69`	`69`	`_make_llm_response("Hello, how can I help you?"),`
`70`		`- {"prompt_tokens": 10, "completion_tokens": 5, "reasoning_tokens": 0, "finish_reason": "stop"},`
	`70`	`+ {"prompt_tokens": 10, "completion_tokens": 5, "finish_reason": "stop"},`
`71`	`71`	`)`
`72`	`72`	`)`
`73`	`73`
`@@ -117,11 +117,11 @@ async def test_single_tool_call_then_response(self):`
`117`	`117`	`side_effect=[`
`118`	`118`	`(`
`119`	`119`	`_make_llm_response("What if there is text here", tool_calls=[tool_call]),`
`120`		`- {"prompt_tokens": 20, "completion_tokens": 10, "reasoning_tokens": 0, "finish_reason": "tool_calls"},`
	`120`	`+ {"prompt_tokens": 20, "completion_tokens": 10, "finish_reason": "tool_calls"},`
`121`	`121`	`),`
`122`	`122`	`(`
`123`	`123`	`_make_llm_response("Your reservation ABC123 is confirmed."),`
`124`		`- {"prompt_tokens": 30, "completion_tokens": 15, "reasoning_tokens": 0, "finish_reason": "stop"},`
	`124`	`+ {"prompt_tokens": 30, "completion_tokens": 15, "finish_reason": "stop"},`
`125`	`125`	`),`
`126`	`126`	`]`
`127`	`127`	`)`
`@@ -204,11 +204,11 @@ async def test_tool_call_with_error_result(self):`
`204`	`204`	`side_effect=[`
`205`	`205`	`(`
`206`	`206`	`_make_llm_response("", tool_calls=[tool_call]),`
`207`		`- {"prompt_tokens": 20, "completion_tokens": 10, "reasoning_tokens": 0, "finish_reason": "tool_calls"},`
	`207`	`+ {"prompt_tokens": 20, "completion_tokens": 10, "finish_reason": "tool_calls"},`
`208`	`208`	`),`
`209`	`209`	`(`
`210`	`210`	`_make_llm_response("I couldn't find that reservation."),`
`211`		`- {"prompt_tokens": 30, "completion_tokens": 10, "reasoning_tokens": 0, "finish_reason": "stop"},`
	`211`	`+ {"prompt_tokens": 30, "completion_tokens": 10, "finish_reason": "stop"},`
`212`	`212`	`),`
`213`	`213`	`]`
`214`	`214`	`)`
`@@ -291,7 +291,7 @@ async def test_transfer_to_agent(self):`
`291`	`291`	`llm_client.complete = AsyncMock(`
`292`	`292`	`return_value=(`
`293`	`293`	`_make_llm_response("", tool_calls=[tool_call]),`
`294`		`- {"prompt_tokens": 20, "completion_tokens": 5, "reasoning_tokens": 0, "finish_reason": "tool_calls"},`
	`294`	`+ {"prompt_tokens": 20, "completion_tokens": 5, "finish_reason": "tool_calls"},`
`295`	`295`	`)`
`296`	`296`	`)`
`297`	`297`