Skip to content
Open
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion holmes/core/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,11 @@ def as_streaming_tool_result_response(self):

def format_tool_result_data(tool_result: StructuredToolResult) -> str:
tool_response = tool_result.data
if isinstance(tool_result.data, str):
if tool_result.llm_data:
# Some tools can return data dedicated to the LLM. This can be reformatted or summarized data
# These will end up in the conversation history.
tool_response = tool_result.llm_data
elif isinstance(tool_result.data, str):
tool_response = tool_result.data
else:
try:
Expand Down
9 changes: 8 additions & 1 deletion holmes/core/tool_calling_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -679,7 +679,7 @@ def _get_tool_call_result(

tool = self.tool_executor.get_tool_by_name(tool_name)

return ToolCallResult(
tool_call_result = ToolCallResult(
tool_call_id=tool_call_id,
tool_name=tool_name,
description=str(tool.get_parameterized_one_liner(tool_params))
Expand All @@ -688,6 +688,12 @@ def _get_tool_call_result(
result=tool_response,
)

message = tool_call_result.as_tool_call_message()

token_count = self.llm.count_tokens_for_message(messages=[message])
tool_call_result.size = token_count
return tool_call_result

@staticmethod
def _log_tool_call_result(tool_span, tool_call_result: ToolCallResult):
tool_span.set_attributes(name=tool_call_result.tool_name)
Expand All @@ -698,6 +704,7 @@ def _log_tool_call_result(tool_span, tool_call_result: ToolCallResult):
metadata={
"status": tool_call_result.result.status,
"description": tool_call_result.description,
"token_count": tool_call_result.size,
},
)

Expand Down
1 change: 1 addition & 0 deletions holmes/core/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ class StructuredToolResult(BaseModel):
error: Optional[str] = None
return_code: Optional[int] = None
data: Optional[Any] = None
llm_data: Optional[str] = None
url: Optional[str] = None
invocation: Optional[str] = None
params: Optional[Dict] = None
Expand Down
15 changes: 8 additions & 7 deletions holmes/core/tools_utils/tool_context_window_limiter.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,24 @@ def prevent_overly_big_tool_response(tool_call_result: ToolCallResult, llm: LLM)
and 0 < TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT
and TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT <= 100
):
message = tool_call_result.as_tool_call_message()

messages_token = llm.count_tokens_for_message(messages=[message])
if not tool_call_result.size:
message = tool_call_result.as_tool_call_message()
messages_token = llm.count_tokens_for_message(messages=[message])
tool_call_result.size = messages_token
context_window_size = llm.get_context_window_size()
max_tokens_allowed: int = int(
context_window_size * TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT // 100
)

if messages_token > max_tokens_allowed:
if tool_call_result.size > max_tokens_allowed:
relative_pct = (
(messages_token - max_tokens_allowed) / messages_token
(tool_call_result.size - max_tokens_allowed) / tool_call_result.size
) * 100
error_message = f"The tool call result is too large to return: {messages_token} tokens.\nThe maximum allowed tokens is {max_tokens_allowed} which is {format(relative_pct, '.1f')}% smaller.\nInstructions for the LLM: try to repeat the query but proactively narrow down the result so that the tool answer fits within the allowed number of tokens."
error_message = f"The tool call result is too large to return: {tool_call_result.size} tokens.\nThe maximum allowed tokens is {max_tokens_allowed} which is {format(relative_pct, '.1f')}% smaller.\nInstructions for the LLM: try to repeat the query but proactively narrow down the result so that the tool answer fits within the allowed number of tokens."
tool_call_result.result.status = StructuredToolResultStatus.ERROR
tool_call_result.result.data = None
tool_call_result.result.error = error_message

sentry_helper.capture_toolcall_contains_too_many_tokens(
tool_call_result, messages_token, max_tokens_allowed
tool_call_result, tool_call_result.size, max_tokens_allowed
)
60 changes: 55 additions & 5 deletions holmes/plugins/toolsets/newrelic/newrelic.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@
)
from pydantic import BaseModel
from holmes.core.tools import StructuredToolResult, StructuredToolResultStatus
from holmes.plugins.toolsets.prometheus.data_compression import (
raw_metric_to_compressed_metric,
summarize_metrics,
)
from holmes.plugins.toolsets.prometheus.model import PromResponse
from holmes.plugins.toolsets.utils import toolset_name_for_one_liner
from holmes.plugins.toolsets.newrelic.new_relic_api import NewRelicAPI
Expand Down Expand Up @@ -76,14 +80,49 @@ def __init__(self, toolset: "NewRelicToolset"):
)
self._toolset = toolset

def format_metrics(
def compress_metrics_data(self, response: PromResponse) -> Optional[str]:
llm_data: Optional[str] = None
try:
if self._toolset.config and self._toolset.compress_metrics:
metrics = [
raw_metric_to_compressed_metric(metric, remove_labels=set())
for metric in response.data.result
]

compressed_data = summarize_metrics(metrics)
original_size = len(json.dumps(response.to_json()))
compressed_size = len(json.dumps(compressed_data))
compression_ratio = (
(1 - compressed_size / original_size) * 100
if original_size > 0
else 0
)

if compression_ratio > self._toolset.compress_metrics_minimum_ratio:
# below this amount it's likely not worth mutating the response
llm_data = compressed_data
logging.info(
f"Compressed Newrelic metrics: {original_size:,} → {compressed_size:,} chars "
f"({compression_ratio:.1f}% reduction)"
)
else:
logging.info(
f"Compressed Newrelic metrics: {original_size:,} → {compressed_size:,} chars "
f"({compression_ratio:.1f}% reduction). Original data will be used instead."
)
except Exception:
logging.warning("Failed to compress newrelic data", exc_info=True)

return llm_data

def to_new_relic_records(
self,
records: List[Dict[str, Any]],
params: Optional[Dict[str, Any]] = None,
begin_key: str = "beginTimeSeconds",
end_key: str = "endTimeSeconds",
facet_key: str = "facet",
) -> Dict[str, Any]:
) -> PromResponse:
resp = PromResponse.from_newrelic_records(
records=records,
tool_name=self.name,
Expand All @@ -92,7 +131,7 @@ def format_metrics(
end_key=end_key,
facet_key=facet_key,
)
return resp.to_json()
return resp

def _invoke(
self, params: dict, user_approved: bool = False
Expand All @@ -108,7 +147,6 @@ def _invoke(

query = params["query"]
result: List[Dict[str, Any]] = api.execute_nrql_query(query)

qtype = params.get("query_type", "").lower()

if qtype == "traces":
Expand Down Expand Up @@ -137,12 +175,16 @@ def _invoke(
if qtype == "metrics" or "timeseries" in query.lower():
enriched_params = dict(params)
enriched_params["query"] = query
return_result = self.format_metrics(result, params=enriched_params)
prom_data = self.to_new_relic_records(result, params=enriched_params)

return_result = prom_data.to_json()
print(json.dumps(return_result, indent=2))
if len(return_result.get("data", {}).get("results", [])):
return_result = result # type: ignore[assignment]
return StructuredToolResult(
status=StructuredToolResultStatus.SUCCESS,
data=json.dumps(return_result, indent=2),
llm_data=self.compress_metrics_data(prom_data),
params=params,
)

Expand Down Expand Up @@ -205,12 +247,16 @@ class NewrelicConfig(BaseModel):
nr_api_key: Optional[str] = None
nr_account_id: Optional[str] = None
is_eu_datacenter: Optional[bool] = False
compress_metrics: bool = True
compress_metrics_minimum_ratio: int = 30 # 20 means 20% size reduction


class NewRelicToolset(Toolset):
nr_api_key: Optional[str] = None
nr_account_id: Optional[str] = None
is_eu_datacenter: bool = False
compress_metrics: bool = True
compress_metrics_minimum_ratio: int = 30

def __init__(self):
super().__init__(
Expand Down Expand Up @@ -241,6 +287,10 @@ def prerequisites_callable(
self.nr_account_id = nr_config.nr_account_id
self.nr_api_key = nr_config.nr_api_key
self.is_eu_datacenter = nr_config.is_eu_datacenter or False
self.compress_metrics = nr_config.compress_metrics or True
self.compress_metrics_minimum_ratio = (
nr_config.compress_metrics_minimum_ratio or 30
)

if not self.nr_account_id or not self.nr_api_key:
return False, "New Relic account ID or API key is missing"
Expand Down
Loading
Loading