robusta-dev · RoiGlinik · Oct 21, 2025 · Oct 21, 2025 · Oct 21, 2025 · Oct 21, 2025
diff --git a/holmes/plugins/toolsets/robusta/robusta.py b/holmes/plugins/toolsets/robusta/robusta.py
@@ -202,7 +202,7 @@ def __init__(
         )
         self._dal = dal
 
-    def _fetch_change_history(
+    def _fetch_issues(
         self,
         params: Dict,
         cluster: Optional[str] = None,
@@ -225,7 +225,7 @@ def _fetch_change_history(
 
     def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
         try:
-            changes = self._fetch_change_history(params)
+            changes = self._fetch_issues(params)
             if changes:
                 return StructuredToolResult(
                     status=StructuredToolResultStatus.SUCCESS,
@@ -282,8 +282,8 @@ def __init__(self, dal: Optional[SupabaseDal]):
             add_cluster_filter=False,
         )
 
-    def _fetch_change_history(self, params: Dict) -> Optional[List[Dict]]:  # type: ignore
-        return super()._fetch_change_history(params, cluster="external")
+    def _fetch_issues(self, params: Dict) -> Optional[List[Dict]]:  # type: ignore
+        return super()._fetch_issues(params, cluster="external")
 
     def get_parameterized_one_liner(self, params: Dict) -> str:
         return f"Robusta: Search External Change History {params}"
@@ -296,14 +296,28 @@ def __init__(self, dal: Optional[SupabaseDal]):
             name="fetch_resource_issues_metadata",
             description=(
                 "Fetch issues and alert metadata in a given time range. "
-                "Must be filtered on a given namespace and specific kubernetes resource such as pod, deployment, job, etc."
+                "Must be filtered on a given namespace and specific kubernetes resource, such as pod, deployment, job, etc. "
                 "Use fetch_finding_by_id to get further information on a specific issue or alert."
             ),
-            add_cluster_filter=True,
+            add_cluster_filter=False,
+        )
+        self.parameters.update(
+            {
+                "namespace": ToolParameter(
+                    description="The Kubernetes namespace name for filtering issues and alerts",
+                    type="string",
+                    required=True,
+                ),
+                "workload": ToolParameter(
+                    description="Kubernetes resource name to filter issues and alerts (e.g., Pod, Deployment, Job, etc.). Must be the full name. For Pods, include the exact generated suffix.",
+                    type="string",
+                    required=True,
+                ),
+            }
         )
 
-    def _fetch_resource_issues(self, params: Dict) -> Optional[List[Dict]]:  # type: ignore
-        return super()._fetch_change_history(params, finding_type=FindingType.ISSUE)
+    def _fetch_issues(self, params: Dict) -> Optional[List[Dict]]:  # type: ignore
+        return super()._fetch_issues(params, finding_type=FindingType.ISSUE)
 
     def get_parameterized_one_liner(self, params: Dict) -> str:
         return f"Robusta: fetch resource issues metadata {params}"

diff --git a/tests/llm/fixtures/test_ask_holmes/18_oom_kill_from_issues_history/issues_metadata.json b/tests/llm/fixtures/test_ask_holmes/18_oom_kill_from_issues_history/issues_metadata.json
@@ -0,0 +1,97 @@
+[
+  {
+    "id": "91160555-4aaf-4200-9fcf-3ffb93b93c89",
+    "description": "Back-off restarting failed container memory-eater in pod analytics-exporter-fast-76897854c-xxljr_default(dec2ce4b-a210-485d-8e6c-42b2ca28ecde)",
+    "source": "kubernetes_api_server",
+    "category": null,
+    "priority": "DEBUG",
+    "account_id": "16ecba1a-7993-4dd1-a98c-d201462ccba7",
+    "subject_type": "pod",
+    "subject_name": "analytics-exporter-fast-76897854c-xxljr",
+    "service_key": "default/Deployment/analytics-exporter-fast",
+    "subject_namespace": "default",
+    "cluster": "test",
+    "creation_date": "2025-10-19 10:59:27.643325",
+    "title": "BackOff Warning for Pod default/analytics-exporter-fast-76897854c-xxljr",
+    "aggregation_key": "PodLifecycleWarning",
+    "finding_type": "issue",
+    "failure": true,
+    "group_id": null,
+    "subject_node": null,
+    "starts_at": "2025-10-19 10:59:27.393256+00",
+    "ends_at": null,
+    "updated_at": "2025-10-19 10:59:27.577263+00",
+    "fingerprint": "adafae9087aaaab1db55131d4815737d4978b99956b71667dccb37bb23f8c1a9",
+    "video_links": [],
+    "service_kind": null,
+    "service_name": null,
+    "labels": {},
+    "annotations": {}
+  },
+  {
+    "id": "7a63a5cf-34e3-4802-a32f-c2a3de14d44a",
+    "description": null,
+    "source": "kubernetes_api_server",
+    "category": null,
+    "priority": "HIGH",
+    "account_id": "16ecba1a-7993-4dd1-a98c-d201462ccba7",
+    "subject_type": "pod",
+    "subject_name": "analytics-exporter-fast-76897854c-xxljr",
+    "service_key": "default/Deployment/analytics-exporter-fast",
+    "subject_namespace": "default",
+    "cluster": "test",
+    "creation_date": "2025-10-19 10:59:56.536309",
+    "title": "Crashing pod analytics-exporter-fast-76897854c-xxljr in namespace default",
+    "aggregation_key": "CrashLoopBackoff",
+    "finding_type": "issue",
+    "failure": true,
+    "group_id": null,
+    "subject_node": "aks-agentpool-35525070-vmss000001",
+    "starts_at": "2025-10-19 10:59:56.0862+00",
+    "ends_at": null,
+    "updated_at": "2025-10-19 10:59:56.459443+00",
+    "fingerprint": "71ea894a65ea183323d859d3c91951a3b0dba81a7ecda3a9cef605f23a81514c",
+    "video_links": [],
+    "service_kind": null,
+    "service_name": null,
+    "labels": {
+      "app": "analytics-exporter-fast",
+      "pod-template-hash": "76897854c"
+    },
+    "annotations": {}
+  },
+  {
+    "id": "b517520c-9fa9-491c-9754-821dc5ed1f75",
+    "description": null,
+    "source": "kubernetes_api_server",
+    "category": null,
+    "priority": "HIGH",
+    "account_id": "16ecba1a-7993-4dd1-a98c-d201462ccba7",
+    "subject_type": "pod",
+    "subject_name": "analytics-exporter-fast-76897854c-xxljr",
+    "service_key": "default/Deployment/analytics-exporter",
+    "subject_namespace": "default",
+    "cluster": "test",
+    "creation_date": "2025-10-19 10:59:56.536309",
+    "title": "Pod analytics-exporter-fast-76897854c-xxljr in namespace default OOMKilled results",
+    "aggregation_key": "PodOOMKilled",
+    "finding_type": "issue",
+    "failure": true,
+    "group_id": null,
+    "subject_node": "ip-10-0-237-138.us-east-2.compute.internal",
+    "starts_at": "2025-10-19 10:59:56.536309+00",
+    "ends_at": null,
+    "updated_at": "2025-10-19 10:59:56.536309+00",
+    "fingerprint": "3e03802bbc0a878f40772250cf5bb9926b504b0e9429b9ce3373922d9d067d72",
+    "video_links": [],
+    "service_kind": null,
+    "service_name": null,
+    "labels": {
+      "app": "analytics-exporter",
+      "pod-template-hash": "7fb8857595"
+    },
+    "annotations": {
+      "robusta.kubernetes.io/restartedAt": "2025-10-19T10:59:56.565094+00:00"
+    }
+  }
+]
diff --git a/tests/llm/fixtures/test_ask_holmes/18_oom_kill_from_issues_history/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/18_oom_kill_from_issues_history/test_case.yaml
@@ -0,0 +1,17 @@
+description: |
+  A test case that simulates an issue real customer bumped into.
+  When holmes was asked about a pod that died some time ago and could not find any information about it , it made up an answer about his condition.
+  This test checks the new fetch_resource_issue_metadata function to allow holmes to get historical issues from our timeline and find the pod kill reason in history.
+
+user_prompt: "why did my pod analytics-exporter-fast-76897854c-xxljr died around 2025-10-19?"
+expected_output:
+  - The result mentions the analytics-exporter-fast pod was OOMKILLED
+before_test:
+after_test:
+
+
+test_type: "server"
+
+tags:
+  - easy
+  - kubernetes
diff --git a/tests/llm/fixtures/test_ask_holmes/18_oom_kill_from_issues_history/toolsets.yaml b/tests/llm/fixtures/test_ask_holmes/18_oom_kill_from_issues_history/toolsets.yaml
@@ -0,0 +1,7 @@
+toolsets:
+  kubernetes/core:
+    enabled: true
+  robusta:
+    enabled: true
+  kubernetes/logs:
+    enabled: true
diff --git a/tests/llm/utils/mock_dal.py b/tests/llm/utils/mock_dal.py
@@ -2,26 +2,34 @@
 import json
 import logging
 from pathlib import Path
-from typing import Dict, Optional
+from typing import Dict, Optional, List
 
 from pydantic import TypeAdapter
 
-from holmes.core.supabase_dal import SupabaseDal
+from holmes.core.supabase_dal import SupabaseDal, FindingType
 from holmes.core.tool_calling_llm import Instructions, ResourceInstructions
 from tests.llm.utils.test_case_utils import read_file
+from datetime import datetime, timezone
 
 
 class MockSupabaseDal(SupabaseDal):
     def __init__(
         self,
         test_case_folder: Path,
         issue_data: Optional[Dict],
+        issues_metadata: Optional[List[Dict]],
         resource_instructions: Optional[ResourceInstructions],
         generate_mocks: bool,
     ):
-        super().__init__(cluster="test")
+        try:
+            super().__init__(cluster="test")
+        except Exception:
+            logging.warning(
+                "Mocksupabase dal could not connect to db. Running in pure mock mode. real db calls and --generate-mock will fail."
+            )
         self._issue_data = issue_data
         self._resource_instructions = resource_instructions
+        self._issues_metadata = issues_metadata
         self._test_case_folder = test_case_folder
         self._generate_mocks = generate_mocks
 
@@ -72,6 +80,74 @@ def get_global_instructions_for_account(self) -> Optional[Instructions]:
     def get_workload_issues(self, *args) -> list:
         return []
 
+    def get_issues_metadata(
+        self,
+        start_datetime: str,
+        end_datetime: str,
+        limit: int = 100,
+        workload: Optional[str] = None,
+        ns: Optional[str] = None,
+        cluster: Optional[str] = None,
+        finding_type: FindingType = FindingType.CONFIGURATION_CHANGE,
+    ) -> Optional[List[Dict]]:
+        if self._issues_metadata is not None:
+            filtered_data = []
+            if not cluster:
+                cluster = self.cluster
+            for item in self._issues_metadata:
+                creation_date, start, end = [
+                    datetime.fromisoformat(dt.replace("Z", "+00:00")).astimezone(
+                        timezone.utc
+                    )
+                    for dt in (item["creation_date"], start_datetime, end_datetime)
+                ]
+                if not (start <= creation_date <= end):
+                    continue
+                if item.get("finding_type") != finding_type.value:
+                    continue
+                if item.get("cluster") != cluster:
+                    continue
+                if workload:
+                    if item.get("subject_name") != workload:
+                        continue
+                if ns:
+                    if item.get("subject_namespace") != ns:
+                        continue
+
+                filtered_item = {
+                    "id": item.get("id"),
+                    "title": item.get("title"),
+                    "subject_name": item.get("subject_name"),
+                    "subject_namespace": item.get("subject_namespace"),
+                    "subject_type": item.get("subject_type"),
+                    "description": item.get("description"),
+                    "starts_at": item.get("starts_at"),
+                    "ends_at": item.get("ends_at"),
+                }
+                filtered_data.append(filtered_item)
+            filtered_data = filtered_data[:limit]
+
+            return filtered_data if filtered_data else None
+        else:
+            data = super().get_issues_metadata(
+                start_datetime, end_datetime, limit, workload, ns, cluster, finding_type
+            )
+            if self._generate_mocks:
+                file_path = self._get_mock_file_path("issues_metadata")
+
+                with open(file_path, "w") as f:
+                    f.write(json.dumps(data or {}, indent=2))
+                    f.close()
+
+                logging.warning(
+                    f"A mock file was generated for you at {file_path} "
+                    f"with the content of dal.get_issues_metadata("
+                    f"{start_datetime}, {end_datetime}, {limit}, "
+                    f"{workload}, {ns}, {finding_type})"
+                )
+
+            return data
+
 
 pydantic_resource_instructions = TypeAdapter(ResourceInstructions)
 
@@ -82,6 +158,11 @@ def load_mock_dal(test_case_folder: Path, generate_mocks: bool):
     if issue_data_mock_path.exists():
         issue_data = json.loads(read_file(issue_data_mock_path))
 
+    issues_metadata_path = test_case_folder.joinpath(Path("issues_metadata.json"))
+    issues_metadata = None
+    if issues_metadata_path.exists():
+        issues_metadata = json.loads(read_file(issues_metadata_path))
+
     resource_instructions_mock_path = test_case_folder.joinpath(
         Path("resource_instructions.json")
     )
@@ -95,5 +176,6 @@ def load_mock_dal(test_case_folder: Path, generate_mocks: bool):
         test_case_folder=test_case_folder,
         issue_data=issue_data,
         resource_instructions=resource_instructions,
+        issues_metadata=issues_metadata,
         generate_mocks=generate_mocks,
     )
diff --git a/tests/llm/utils/mock_toolset.py b/tests/llm/utils/mock_toolset.py
@@ -10,6 +10,8 @@
 import threading
 from pydantic import BaseModel
 import pytest
+from tests.llm.utils.mock_dal import load_mock_dal
+from pathlib import Path
 
 from holmes.core.tools import (
     StructuredToolResult,
@@ -643,8 +645,13 @@ def _get_toolset_mode(self, toolset_name: str) -> MockMode:
 
     def _initialize_toolsets(self):
         """Initialize and configure toolsets."""
+
+        mock_dal = load_mock_dal(
+            test_case_folder=Path(self.test_case_folder),
+            generate_mocks=self.mock_generation_config.generate_mocks,
+        )
         # Load builtin toolsets
-        builtin_toolsets = load_builtin_toolsets()
+        builtin_toolsets = load_builtin_toolsets(mock_dal)
 
         # Load custom toolsets from YAML if present
         config_path = os.path.join(self.test_case_folder, "toolsets.yaml")