lmstudio-ai · ncoghlan · Feb 27, 2025 · Feb 27, 2025 · Feb 27, 2025
diff --git a/src/lmstudio/json_api.py b/src/lmstudio/json_api.py
@@ -123,6 +123,7 @@
 # explicitly via `lmstudio.json_api`, it isn't exported
 # implicitly as part of the top-level `lmstudio` API.
 __all__ = [
+    "ActResult",
     "AnyModelSpecifier",
     "EmbeddingModelInfo",
     "EmbeddingModelInstanceInfo",
@@ -151,7 +152,6 @@
     "ModelSpecifierDict",
     "ModelQuery",
     "ModelQueryDict",
-    "OperationResult",
     "PredictionResult",
     "PredictionRoundResult",
     "SerializedLMSExtendedError",
@@ -455,9 +455,9 @@ def _to_history_content(self) -> str:
 
 @dataclass(kw_only=True, frozen=True, slots=True)
 class PredictionRoundResult(PredictionResult[str]):
-    """The result of a prediction within a multi-round tool using operation."""
+    """The result of a prediction within a multi-round tool using action."""
 
-    round_index: int  # The round within the operation that produced this result
+    round_index: int  # The round within the action that produced this result
 
     @classmethod
     def from_result(cls, result: PredictionResult[str], round_index: int) -> Self:
@@ -471,10 +471,10 @@ def from_result(cls, result: PredictionResult[str], round_index: int) -> Self:
 
 
 @dataclass(kw_only=True, frozen=True, slots=True)
-class OperationResult:
-    """Summary of a completed multi-round tool using operation."""
+class ActResult:
+    """Summary of a completed multi-round tool using action."""
 
-    # Actual operation output is reported via callbacks
+    # Detailed action results are reported via callbacks (for now)
 
     # fmt: off
     rounds: int
@@ -1073,7 +1073,7 @@ def __init__(
         on_first_token: Callable[[], None] | None = None,
         on_prediction_fragment: Callable[[LlmPredictionFragment], None] | None = None,
         on_prompt_processing_progress: Callable[[float], None] | None = None,
-        # The remaining options are only relevant for multi-round tool operations
+        # The remaining options are only relevant for multi-round tool actions
         handle_invalid_tool_request: Callable[
             [LMStudioPredictionError, _ToolCallRequest | None], str
         ]
@@ -1359,7 +1359,7 @@ def parse_tools(
         """Split tool function definitions into server and client details."""
         if not tools:
             raise LMStudioValueError(
-                "Tool operation requires at least one tool to be defined."
+                "Tool using actions require at least one tool to be defined."
             )
         llm_tool_defs: list[LlmTool] = []
         client_tool_map: dict[str, ClientToolSpec] = {}

diff --git a/src/lmstudio/sync_api.py b/src/lmstudio/sync_api.py
@@ -53,6 +53,7 @@
     _ToolCallRequest,
 )
 from .json_api import (
+    ActResult,
     AnyModelSpecifier,
     AvailableModelBase,
     ChannelEndpoint,
@@ -85,7 +86,6 @@
     ModelSessionTypes,
     ModelTypesEmbedding,
     ModelTypesLlm,
-    OperationResult,
     PredictionEndpoint,
     PredictionFragmentEvent,
     PredictionResult,
@@ -1539,7 +1539,7 @@ def respond(
     # Multi-round predictions are currently a sync-only handle-only feature
     # TODO: Refactor to allow for more code sharing with the async API
     @sdk_public_api()
-    def operate(
+    def act(
         self,
         chat: Chat | ChatHistoryDataDict | str,
         tools: Iterable[ToolFunctionDef | ToolFunctionDefDict],
@@ -1559,14 +1559,14 @@ def operate(
             [LMStudioPredictionError, _ToolCallRequest | None], str
         ]
         | None = None,
-    ) -> OperationResult:
-        """Request a response (with implicit tool use) in an ongoing assistant chat session."""
-        operation_start_time = time.perf_counter()
+    ) -> ActResult:
+        """Request a response (with implicit tool use) in an ongoing agent chat session."""
+        start_time = time.perf_counter()
         # It is not yet possible to combine tool calling with requests for structured responses
         response_format = None
         if isinstance(chat, Chat):
             chat._fetch_file_handles(self._session._fetch_file_handle)
-        op_chat: Chat = Chat.from_history(chat)
+        agent_chat: Chat = Chat.from_history(chat)
         del chat
         # Multiple rounds, until all tool calls are resolved or limit is reached
         round_counter: Iterable[int]
@@ -1622,9 +1622,11 @@ def _wrapped_on_prompt_processing_progress(progress: float) -> None:
                 # Update the endpoint definition on each iteration in order to:
                 # * update the chat history with the previous round result
                 # * be able to disallow tool use when the rounds are limited
+                # TODO: Refactor endpoint API to avoid repeatedly performing the
+                #       LlmPredictionConfig -> KvConfigStack transformation
                 endpoint = ChatResponseEndpoint(
                     self.identifier,
-                    op_chat,
+                    agent_chat,
                     response_format,
                     config,
                     None,  # Multiple messages are generated per round
@@ -1658,23 +1660,29 @@ def _wrapped_on_prompt_processing_progress(progress: float) -> None:
                     tool_results = [
                         fut.result() for fut in as_completed(pending_tool_calls)
                     ]
-                    requests_message = op_chat._add_assistant_tool_requests(
+                    requests_message = agent_chat._add_assistant_tool_requests(
                         prediction, tool_call_requests
                     )
-                    results_message = op_chat._add_tool_results(tool_results)
+                    results_message = agent_chat._add_tool_results(tool_results)
                     if on_message is not None:
                         on_message(requests_message)
                         on_message(results_message)
                 elif on_message is not None:
-                    on_message(op_chat.add_assistant_response(prediction))
+                    on_message(agent_chat.add_assistant_response(prediction))
                 if on_round_end is not None:
                     on_round_end(round_index)
                 if not tool_call_requests:
                     # No tool call requests -> we're done here
                     break
+                if round_index == final_round_index:
+                    # We somehow received at least one tool call request,
+                    # even though tools are omitted on the final round
+                    err_msg = "Model requested tool use on final prediction round."
+                    endpoint._handle_invalid_tool_request(err_msg)
+                    break
         num_rounds = round_index + 1
-        duration = time.perf_counter() - operation_start_time
-        return OperationResult(rounds=num_rounds, total_time_seconds=duration)
+        duration = time.perf_counter() - start_time
+        return ActResult(rounds=num_rounds, total_time_seconds=duration)
 
     @sdk_public_api()
     def apply_prompt_template(

diff --git a/tests/test_inference.py b/tests/test_inference.py
@@ -162,7 +162,7 @@ def test_duplicate_tool_names_rejected() -> None:
 
 
 @pytest.mark.lmstudio
-def test_tool_operation(caplog: LogCap) -> None:
+def test_tool_using_agent(caplog: LogCap) -> None:
     # This is currently a sync-only API (it will be refactored after 1.0.0)
 
     caplog.set_level(logging.DEBUG)
@@ -177,9 +177,9 @@ def test_tool_operation(caplog: LogCap) -> None:
         # Ensure ignoring the round index passes static type checks
         predictions: list[PredictionResult[str]] = []
 
-        op_result = llm.operate(chat, tools, on_prediction_completed=predictions.append)
+        act_result = llm.act(chat, tools, on_prediction_completed=predictions.append)
         assert len(predictions) > 1
-        assert op_result.rounds == len(predictions)
+        assert act_result.rounds == len(predictions)
         assert "220" in predictions[-1].content
 
     for _logger_name, log_level, message in caplog.record_tuples:
@@ -194,7 +194,7 @@ def test_tool_operation(caplog: LogCap) -> None:
 
 
 @pytest.mark.lmstudio
-def test_tool_operation_callbacks(caplog: LogCap) -> None:
+def test_tool_using_agent_callbacks(caplog: LogCap) -> None:
     # This is currently a sync-only API (it will be refactored after 1.0.0)
 
     caplog.set_level(logging.DEBUG)
@@ -222,7 +222,7 @@ def _append_fragment(f: LlmPredictionFragment, round_index: int) -> None:
 
         # TODO: Also check on_prompt_processing_progress and handling invalid messages
         # (although it isn't clear how to provoke calls to the latter without mocking)
-        op_result = llm.operate(
+        act_result = llm.act(
             chat,
             tools,
             on_first_token=first_tokens.append,
@@ -232,7 +232,7 @@ def _append_fragment(f: LlmPredictionFragment, round_index: int) -> None:
             on_round_end=round_ends.append,
             on_prediction_completed=predictions.append,
         )
-        num_rounds = op_result.rounds
+        num_rounds = act_result.rounds
         sequential_round_indices = list(range(num_rounds))
         assert num_rounds > 1
         assert [p.round_index for p in predictions] == sequential_round_indices