robusta-dev · nherment · Sep 19, 2025 · Sep 19, 2025 · Sep 19, 2025 · Sep 22, 2025
diff --git a/holmes/core/models.py b/holmes/core/models.py
@@ -68,7 +68,11 @@ def as_streaming_tool_result_response(self):
 
 def format_tool_result_data(tool_result: StructuredToolResult) -> str:
     tool_response = tool_result.data
-    if isinstance(tool_result.data, str):
+    if tool_result.llm_data:
+        # Some tools can return data dedicated to the LLM. This can be reformatted or summarized data
+        # These will end up in the conversation history.
+        tool_response = tool_result.llm_data
+    elif isinstance(tool_result.data, str):
         tool_response = tool_result.data
     else:
         try:

diff --git a/holmes/core/tool_calling_llm.py b/holmes/core/tool_calling_llm.py
@@ -679,7 +679,7 @@ def _get_tool_call_result(
 
         tool = self.tool_executor.get_tool_by_name(tool_name)
 
-        return ToolCallResult(
+        tool_call_result = ToolCallResult(
             tool_call_id=tool_call_id,
             tool_name=tool_name,
             description=str(tool.get_parameterized_one_liner(tool_params))
@@ -688,6 +688,12 @@ def _get_tool_call_result(
             result=tool_response,
         )
 
+        message = tool_call_result.as_tool_call_message()
+
+        token_count = self.llm.count_tokens_for_message(messages=[message])
+        tool_call_result.size = token_count
+        return tool_call_result
+
     @staticmethod
     def _log_tool_call_result(tool_span, tool_call_result: ToolCallResult):
         tool_span.set_attributes(name=tool_call_result.tool_name)
@@ -698,6 +704,7 @@ def _log_tool_call_result(tool_span, tool_call_result: ToolCallResult):
             metadata={
                 "status": tool_call_result.result.status,
                 "description": tool_call_result.description,
+                "token_count": tool_call_result.size,
             },
         )
 

diff --git a/holmes/core/tools.py b/holmes/core/tools.py
@@ -81,6 +81,7 @@ class StructuredToolResult(BaseModel):
     error: Optional[str] = None
     return_code: Optional[int] = None
     data: Optional[Any] = None
+    llm_data: Optional[str] = None
     url: Optional[str] = None
     invocation: Optional[str] = None
     params: Optional[Dict] = None

diff --git a/holmes/core/tools_utils/tool_context_window_limiter.py b/holmes/core/tools_utils/tool_context_window_limiter.py
@@ -11,23 +11,24 @@ def prevent_overly_big_tool_response(tool_call_result: ToolCallResult, llm: LLM)
         and 0 < TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT
         and TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT <= 100
     ):
-        message = tool_call_result.as_tool_call_message()
-
-        messages_token = llm.count_tokens_for_message(messages=[message])
+        if not tool_call_result.size:
+            message = tool_call_result.as_tool_call_message()
+            messages_token = llm.count_tokens_for_message(messages=[message])
+            tool_call_result.size = messages_token
         context_window_size = llm.get_context_window_size()
         max_tokens_allowed: int = int(
             context_window_size * TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT // 100
         )
 
-        if messages_token > max_tokens_allowed:
+        if tool_call_result.size > max_tokens_allowed:
             relative_pct = (
-                (messages_token - max_tokens_allowed) / messages_token
+                (tool_call_result.size - max_tokens_allowed) / tool_call_result.size
             ) * 100
-            error_message = f"The tool call result is too large to return: {messages_token} tokens.\nThe maximum allowed tokens is {max_tokens_allowed} which is {format(relative_pct, '.1f')}% smaller.\nInstructions for the LLM: try to repeat the query but proactively narrow down the result so that the tool answer fits within the allowed number of tokens."
+            error_message = f"The tool call result is too large to return: {tool_call_result.size} tokens.\nThe maximum allowed tokens is {max_tokens_allowed} which is {format(relative_pct, '.1f')}% smaller.\nInstructions for the LLM: try to repeat the query but proactively narrow down the result so that the tool answer fits within the allowed number of tokens."
             tool_call_result.result.status = StructuredToolResultStatus.ERROR
             tool_call_result.result.data = None
             tool_call_result.result.error = error_message
 
             sentry_helper.capture_toolcall_contains_too_many_tokens(
-                tool_call_result, messages_token, max_tokens_allowed
+                tool_call_result, tool_call_result.size, max_tokens_allowed
             )
diff --git a/holmes/plugins/toolsets/newrelic/newrelic.py b/holmes/plugins/toolsets/newrelic/newrelic.py
@@ -10,6 +10,10 @@
 )
 from pydantic import BaseModel
 from holmes.core.tools import StructuredToolResult, StructuredToolResultStatus
+from holmes.plugins.toolsets.prometheus.data_compression import (
+    raw_metric_to_compressed_metric,
+    summarize_metrics,
+)
 from holmes.plugins.toolsets.prometheus.model import PromResponse
 from holmes.plugins.toolsets.utils import toolset_name_for_one_liner
 from holmes.plugins.toolsets.newrelic.new_relic_api import NewRelicAPI
@@ -76,14 +80,49 @@ def __init__(self, toolset: "NewRelicToolset"):
         )
         self._toolset = toolset
 
-    def format_metrics(
+    def compress_metrics_data(self, response: PromResponse) -> Optional[str]:
+        llm_data: Optional[str] = None
+        try:
+            if self._toolset.config and self._toolset.compress_metrics:
+                metrics = [
+                    raw_metric_to_compressed_metric(metric, remove_labels=set())
+                    for metric in response.data.result
+                ]
+
+                compressed_data = summarize_metrics(metrics)
+                original_size = len(json.dumps(response.to_json()))
+                compressed_size = len(json.dumps(compressed_data))
+                compression_ratio = (
+                    (1 - compressed_size / original_size) * 100
+                    if original_size > 0
+                    else 0
+                )
+
+                if compression_ratio > self._toolset.compress_metrics_minimum_ratio:
+                    # below this amount it's likely not worth mutating the response
+                    llm_data = compressed_data
+                    logging.info(
+                        f"Compressed Newrelic metrics: {original_size:,} → {compressed_size:,} chars "
+                        f"({compression_ratio:.1f}% reduction)"
+                    )
+                else:
+                    logging.info(
+                        f"Compressed Newrelic metrics: {original_size:,} → {compressed_size:,} chars "
+                        f"({compression_ratio:.1f}% reduction). Original data will be used instead."
+                    )
+        except Exception:
+            logging.warning("Failed to compress newrelic data", exc_info=True)
+
+        return llm_data
+
+    def to_new_relic_records(
         self,
         records: List[Dict[str, Any]],
         params: Optional[Dict[str, Any]] = None,
         begin_key: str = "beginTimeSeconds",
         end_key: str = "endTimeSeconds",
         facet_key: str = "facet",
-    ) -> Dict[str, Any]:
+    ) -> PromResponse:
         resp = PromResponse.from_newrelic_records(
             records=records,
             tool_name=self.name,
@@ -92,7 +131,7 @@ def format_metrics(
             end_key=end_key,
             facet_key=facet_key,
         )
-        return resp.to_json()
+        return resp
 
     def _invoke(
         self, params: dict, user_approved: bool = False
@@ -108,7 +147,6 @@ def _invoke(
 
         query = params["query"]
         result: List[Dict[str, Any]] = api.execute_nrql_query(query)
-
         qtype = params.get("query_type", "").lower()
 
         if qtype == "traces":
@@ -137,12 +175,16 @@ def _invoke(
         if qtype == "metrics" or "timeseries" in query.lower():
             enriched_params = dict(params)
             enriched_params["query"] = query
-            return_result = self.format_metrics(result, params=enriched_params)
+            prom_data = self.to_new_relic_records(result, params=enriched_params)
+
+            return_result = prom_data.to_json()
+            print(json.dumps(return_result, indent=2))
             if len(return_result.get("data", {}).get("results", [])):
                 return_result = result  # type: ignore[assignment]
             return StructuredToolResult(
                 status=StructuredToolResultStatus.SUCCESS,
                 data=json.dumps(return_result, indent=2),
+                llm_data=self.compress_metrics_data(prom_data),
                 params=params,
             )
 
@@ -205,12 +247,16 @@ class NewrelicConfig(BaseModel):
     nr_api_key: Optional[str] = None
     nr_account_id: Optional[str] = None
     is_eu_datacenter: Optional[bool] = False
+    compress_metrics: bool = True
+    compress_metrics_minimum_ratio: int = 30  # 20 means 20% size reduction
 
 
 class NewRelicToolset(Toolset):
     nr_api_key: Optional[str] = None
     nr_account_id: Optional[str] = None
     is_eu_datacenter: bool = False
+    compress_metrics: bool = True
+    compress_metrics_minimum_ratio: int = 30
 
     def __init__(self):
         super().__init__(
@@ -241,6 +287,10 @@ def prerequisites_callable(
             self.nr_account_id = nr_config.nr_account_id
             self.nr_api_key = nr_config.nr_api_key
             self.is_eu_datacenter = nr_config.is_eu_datacenter or False
+            self.compress_metrics = nr_config.compress_metrics or True
+            self.compress_metrics_minimum_ratio = (
+                nr_config.compress_metrics_minimum_ratio or 30
+            )
 
             if not self.nr_account_id or not self.nr_api_key:
                 return False, "New Relic account ID or API key is missing"