Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion holmes/core/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,9 @@ def as_streaming_tool_result_response(self):

def format_tool_result_data(tool_result: StructuredToolResult) -> str:
tool_response = tool_result.data
if isinstance(tool_result.data, str):
if tool_result.llm_data:
tool_response = tool_result.llm_data
elif isinstance(tool_result.data, str):
tool_response = tool_result.data
else:
try:
Expand Down
9 changes: 8 additions & 1 deletion holmes/core/tool_calling_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -679,7 +679,7 @@ def _get_tool_call_result(

tool = self.tool_executor.get_tool_by_name(tool_name)

return ToolCallResult(
tool_call_result = ToolCallResult(
tool_call_id=tool_call_id,
tool_name=tool_name,
description=str(tool.get_parameterized_one_liner(tool_params))
Expand All @@ -688,6 +688,12 @@ def _get_tool_call_result(
result=tool_response,
)

message = tool_call_result.as_tool_call_message()

token_count = self.llm.count_tokens_for_message(messages=[message])
tool_call_result.size = token_count
return tool_call_result

@staticmethod
def _log_tool_call_result(tool_span, tool_call_result: ToolCallResult):
tool_span.set_attributes(name=tool_call_result.tool_name)
Expand All @@ -698,6 +704,7 @@ def _log_tool_call_result(tool_span, tool_call_result: ToolCallResult):
metadata={
"status": tool_call_result.result.status,
"description": tool_call_result.description,
"token_count": tool_call_result.size,
},
)

Expand Down
1 change: 1 addition & 0 deletions holmes/core/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ class StructuredToolResult(BaseModel):
error: Optional[str] = None
return_code: Optional[int] = None
data: Optional[Any] = None
llm_data: Optional[str] = None
url: Optional[str] = None
invocation: Optional[str] = None
params: Optional[Dict] = None
Expand Down
15 changes: 8 additions & 7 deletions holmes/core/tools_utils/tool_context_window_limiter.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,24 @@ def prevent_overly_big_tool_response(tool_call_result: ToolCallResult, llm: LLM)
and 0 < TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT
and TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT <= 100
):
message = tool_call_result.as_tool_call_message()

messages_token = llm.count_tokens_for_message(messages=[message])
if not tool_call_result.size:
message = tool_call_result.as_tool_call_message()
messages_token = llm.count_tokens_for_message(messages=[message])
tool_call_result.size = messages_token
context_window_size = llm.get_context_window_size()
max_tokens_allowed: int = int(
context_window_size * TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT // 100
)

if messages_token > max_tokens_allowed:
if tool_call_result.size > max_tokens_allowed:
relative_pct = (
(messages_token - max_tokens_allowed) / messages_token
(tool_call_result.size - max_tokens_allowed) / tool_call_result.size
) * 100
error_message = f"The tool call result is too large to return: {messages_token} tokens.\nThe maximum allowed tokens is {max_tokens_allowed} which is {format(relative_pct, '.1f')}% smaller.\nInstructions for the LLM: try to repeat the query but proactively narrow down the result so that the tool answer fits within the allowed number of tokens."
error_message = f"The tool call result is too large to return: {tool_call_result.size} tokens.\nThe maximum allowed tokens is {max_tokens_allowed} which is {format(relative_pct, '.1f')}% smaller.\nInstructions for the LLM: try to repeat the query but proactively narrow down the result so that the tool answer fits within the allowed number of tokens."
tool_call_result.result.status = StructuredToolResultStatus.ERROR
tool_call_result.result.data = None
tool_call_result.result.error = error_message

sentry_helper.capture_toolcall_contains_too_many_tokens(
tool_call_result, messages_token, max_tokens_allowed
tool_call_result, tool_call_result.size, max_tokens_allowed
)
Loading
Loading