Rename ActionResult to just ActResult

ncoghlan · ncoghlan · commit 355598efe2a7 · 2025-02-27T21:01:27.000+10:00
diff --git a/src/lmstudio/json_api.py b/src/lmstudio/json_api.py
@@ -123,6 +123,7 @@
 # explicitly via `lmstudio.json_api`, it isn't exported
 # implicitly as part of the top-level `lmstudio` API.
 __all__ = [
+    "ActResult",
     "AnyModelSpecifier",
     "EmbeddingModelInfo",
     "EmbeddingModelInstanceInfo",
@@ -151,7 +152,6 @@
     "ModelSpecifierDict",
     "ModelQuery",
     "ModelQueryDict",
-    "ActionResult",
     "PredictionResult",
     "PredictionRoundResult",
     "SerializedLMSExtendedError",
@@ -471,7 +471,7 @@ def from_result(cls, result: PredictionResult[str], round_index: int) -> Self:
 
 
 @dataclass(kw_only=True, frozen=True, slots=True)
-class ActionResult:
+class ActResult:
     """Summary of a completed multi-round tool using action."""
 
     # Detailed action results are reported via callbacks (for now)
diff --git a/src/lmstudio/sync_api.py b/src/lmstudio/sync_api.py
@@ -53,7 +53,7 @@
     _ToolCallRequest,
 )
 from .json_api import (
-    ActionResult,
+    ActResult,
     AnyModelSpecifier,
     AvailableModelBase,
     ChannelEndpoint,
@@ -1559,14 +1559,14 @@ def act(
             [LMStudioPredictionError, _ToolCallRequest | None], str
         ]
         | None = None,
-    ) -> ActionResult:
-        """Request a response (with implicit tool use) in an ongoing assistant chat session."""
-        action_start_time = time.perf_counter()
+    ) -> ActResult:
+        """Request a response (with implicit tool use) in an ongoing agent chat session."""
+        start_time = time.perf_counter()
         # It is not yet possible to combine tool calling with requests for structured responses
         response_format = None
         if isinstance(chat, Chat):
             chat._fetch_file_handles(self._session._fetch_file_handle)
-        action_chat: Chat = Chat.from_history(chat)
+        agent_chat: Chat = Chat.from_history(chat)
         del chat
         # Multiple rounds, until all tool calls are resolved or limit is reached
         round_counter: Iterable[int]
@@ -1622,9 +1622,11 @@ def _wrapped_on_prompt_processing_progress(progress: float) -> None:
                 # Update the endpoint definition on each iteration in order to:
                 # * update the chat history with the previous round result
                 # * be able to disallow tool use when the rounds are limited
+                # TODO: Refactor endpoint API to avoid repeatedly performing the
+                #       LlmPredictionConfig -> KvConfigStack transformation
                 endpoint = ChatResponseEndpoint(
                     self.identifier,
-                    action_chat,
+                    agent_chat,
                     response_format,
                     config,
                     None,  # Multiple messages are generated per round
@@ -1658,23 +1660,29 @@ def _wrapped_on_prompt_processing_progress(progress: float) -> None:
                     tool_results = [
                         fut.result() for fut in as_completed(pending_tool_calls)
                     ]
-                    requests_message = action_chat._add_assistant_tool_requests(
+                    requests_message = agent_chat._add_assistant_tool_requests(
                         prediction, tool_call_requests
                     )
-                    results_message = action_chat._add_tool_results(tool_results)
+                    results_message = agent_chat._add_tool_results(tool_results)
                     if on_message is not None:
                         on_message(requests_message)
                         on_message(results_message)
                 elif on_message is not None:
-                    on_message(action_chat.add_assistant_response(prediction))
+                    on_message(agent_chat.add_assistant_response(prediction))
                 if on_round_end is not None:
                     on_round_end(round_index)
                 if not tool_call_requests:
                     # No tool call requests -> we're done here
                     break
+                if round_index == final_round_index:
+                    # We somehow received at least one tool call request,
+                    # even though tools are omitted on the final round
+                    err_msg = "Model requested tool use on final prediction round."
+                    endpoint._handle_invalid_tool_request(err_msg)
+                    break
         num_rounds = round_index + 1
-        duration = time.perf_counter() - action_start_time
-        return ActionResult(rounds=num_rounds, total_time_seconds=duration)
+        duration = time.perf_counter() - start_time
+        return ActResult(rounds=num_rounds, total_time_seconds=duration)
 
     @sdk_public_api()
     def apply_prompt_template(
diff --git a/tests/test_inference.py b/tests/test_inference.py
@@ -162,7 +162,7 @@ def test_duplicate_tool_names_rejected() -> None:
 
 
 @pytest.mark.lmstudio
-def test_tool_action(caplog: LogCap) -> None:
+def test_tool_using_agent(caplog: LogCap) -> None:
     # This is currently a sync-only API (it will be refactored after 1.0.0)
 
     caplog.set_level(logging.DEBUG)
@@ -177,9 +177,9 @@ def test_tool_action(caplog: LogCap) -> None:
         # Ensure ignoring the round index passes static type checks
         predictions: list[PredictionResult[str]] = []
 
-        action_result = llm.act(chat, tools, on_prediction_completed=predictions.append)
+        act_result = llm.act(chat, tools, on_prediction_completed=predictions.append)
         assert len(predictions) > 1
-        assert action_result.rounds == len(predictions)
+        assert act_result.rounds == len(predictions)
         assert "220" in predictions[-1].content
 
     for _logger_name, log_level, message in caplog.record_tuples:
@@ -194,7 +194,7 @@ def test_tool_action(caplog: LogCap) -> None:
 
 
 @pytest.mark.lmstudio
-def test_tool_action_callbacks(caplog: LogCap) -> None:
+def test_tool_using_agent_callbacks(caplog: LogCap) -> None:
     # This is currently a sync-only API (it will be refactored after 1.0.0)
 
     caplog.set_level(logging.DEBUG)
@@ -222,7 +222,7 @@ def _append_fragment(f: LlmPredictionFragment, round_index: int) -> None:
 
         # TODO: Also check on_prompt_processing_progress and handling invalid messages
         # (although it isn't clear how to provoke calls to the latter without mocking)
-        action_result = llm.act(
+        act_result = llm.act(
             chat,
             tools,
             on_first_token=first_tokens.append,
@@ -232,7 +232,7 @@ def _append_fragment(f: LlmPredictionFragment, round_index: int) -> None:
             on_round_end=round_ends.append,
             on_prediction_completed=predictions.append,
         )
-        num_rounds = action_result.rounds
+        num_rounds = act_result.rounds
         sequential_round_indices = list(range(num_rounds))
         assert num_rounds > 1
         assert [p.round_index for p in predictions] == sequential_round_indices