diff --git a/holmes/config.py b/holmes/config.py index 153bcfb29..729c99e2a 100644 --- a/holmes/config.py +++ b/holmes/config.py @@ -223,10 +223,9 @@ def __get_cluster_name() -> Optional[str]: return None - @staticmethod - def get_runbook_catalog() -> Optional[RunbookCatalog]: + def get_runbook_catalog(self) -> Optional[RunbookCatalog]: # TODO(mainred): besides the built-in runbooks, we need to allow the user to bring their own runbooks - runbook_catalog = load_runbook_catalog() + runbook_catalog = load_runbook_catalog(dal=self.dal) return runbook_catalog def create_console_tool_executor( diff --git a/holmes/core/conversations.py b/holmes/core/conversations.py index 0090781d1..f6516ce9e 100644 --- a/holmes/core/conversations.py +++ b/holmes/core/conversations.py @@ -1,7 +1,6 @@ from typing import Dict, List, Optional import sentry_sdk - from holmes.config import Config from holmes.core.models import ( ToolCallConversationResult, @@ -10,9 +9,10 @@ ) from holmes.plugins.prompts import load_and_render_prompt from holmes.core.tool_calling_llm import ToolCallingLLM +from holmes.plugins.runbooks import RunbookCatalog from holmes.utils.global_instructions import ( Instructions, - add_global_instructions_to_user_prompt, + add_runbooks_to_user_prompt, ) DEFAULT_TOOL_SIZE = 10000 @@ -64,6 +64,7 @@ def build_issue_chat_messages( ai: ToolCallingLLM, config: Config, global_instructions: Optional[Instructions] = None, + runbooks: Optional[RunbookCatalog] = None, ): """ This function generates a list of messages for issue conversation and ensures that the message sequence adheres to the model's context window limitations @@ -120,8 +121,10 @@ def build_issue_chat_messages( tools_for_investigation = issue_chat_request.investigation_result.tools if not conversation_history or len(conversation_history) == 0: - user_prompt = add_global_instructions_to_user_prompt( - user_prompt, global_instructions + user_prompt = add_runbooks_to_user_prompt( + user_prompt=user_prompt, + runbook_catalog=runbooks, + global_instructions=global_instructions, ) number_of_tools_for_investigation = len(tools_for_investigation) # type: ignore @@ -134,6 +137,7 @@ def build_issue_chat_messages( "issue": issue_chat_request.issue_type, "toolsets": ai.tool_executor.toolsets, "cluster_name": config.cluster_name, + "runbooks_enabled": True if runbooks else False, }, ) messages = [ @@ -154,6 +158,7 @@ def build_issue_chat_messages( "issue": issue_chat_request.issue_type, "toolsets": ai.tool_executor.toolsets, "cluster_name": config.cluster_name, + "runbooks_enabled": True if runbooks else False, } system_prompt_without_tools = load_and_render_prompt( template_path, template_context_without_tools @@ -187,6 +192,7 @@ def build_issue_chat_messages( "issue": issue_chat_request.issue_type, "toolsets": ai.tool_executor.toolsets, "cluster_name": config.cluster_name, + "runbooks_enabled": True if runbooks else False, } system_prompt_with_truncated_tools = load_and_render_prompt( template_path, truncated_template_context @@ -202,8 +208,10 @@ def build_issue_chat_messages( }, ] - user_prompt = add_global_instructions_to_user_prompt( - user_prompt, global_instructions + user_prompt = add_runbooks_to_user_prompt( + user_prompt=user_prompt, + runbook_catalog=runbooks, + global_instructions=global_instructions, ) conversation_history.append( @@ -228,6 +236,7 @@ def build_issue_chat_messages( "issue": issue_chat_request.issue_type, "toolsets": ai.tool_executor.toolsets, "cluster_name": config.cluster_name, + "runbooks_enabled": True if runbooks else False, } system_prompt_without_tools = load_and_render_prompt( template_path, template_context_without_tools @@ -251,6 +260,7 @@ def build_issue_chat_messages( "issue": issue_chat_request.issue_type, "toolsets": ai.tool_executor.toolsets, "cluster_name": config.cluster_name, + "runbooks_enabled": True if runbooks else False, } system_prompt_with_truncated_tools = load_and_render_prompt( template_path, template_context @@ -267,6 +277,7 @@ def add_or_update_system_prompt( ai: ToolCallingLLM, config: Config, additional_system_prompt: Optional[str] = None, + runbooks: Optional[RunbookCatalog] = None, ): """Either add the system prompt or replace an existing system prompt. As a 'defensive' measure, this code will only replace an existing system prompt if it is the @@ -278,10 +289,10 @@ def add_or_update_system_prompt( context = { "toolsets": ai.tool_executor.toolsets, "cluster_name": config.cluster_name, + "runbooks_enabled": True if runbooks else False, } system_prompt = load_and_render_prompt(template_path, context) - if additional_system_prompt: system_prompt = system_prompt + "\n" + additional_system_prompt @@ -311,6 +322,7 @@ def build_chat_messages( config: Config, global_instructions: Optional[Instructions] = None, additional_system_prompt: Optional[str] = None, + runbooks: Optional[RunbookCatalog] = None, ) -> List[dict]: """ This function generates a list of messages for general chat conversation and ensures that the message sequence adheres to the model's context window limitations @@ -370,9 +382,15 @@ def build_chat_messages( ai=ai, config=config, additional_system_prompt=additional_system_prompt, + runbooks=runbooks, + ) + + ask = add_runbooks_to_user_prompt( + user_prompt=ask, + runbook_catalog=runbooks, + global_instructions=global_instructions, ) - ask = add_global_instructions_to_user_prompt(ask, global_instructions) conversation_history.append( # type: ignore { "role": "user", @@ -404,6 +422,7 @@ def build_workload_health_chat_messages( ai: ToolCallingLLM, config: Config, global_instructions: Optional[Instructions] = None, + runbooks: Optional[RunbookCatalog] = None, ): """ This function generates a list of messages for workload health conversation and ensures that the message sequence adheres to the model's context window limitations @@ -462,8 +481,10 @@ def build_workload_health_chat_messages( resource = workload_health_chat_request.resource if not conversation_history or len(conversation_history) == 0: - user_prompt = add_global_instructions_to_user_prompt( - user_prompt, global_instructions + user_prompt = add_runbooks_to_user_prompt( + user_prompt=user_prompt, + runbook_catalog=runbooks, + global_instructions=global_instructions, ) number_of_tools_for_workload = len(tools_for_workload) # type: ignore @@ -476,6 +497,7 @@ def build_workload_health_chat_messages( "resource": resource, "toolsets": ai.tool_executor.toolsets, "cluster_name": config.cluster_name, + "runbooks_enabled": True if runbooks else False, }, ) messages = [ @@ -496,6 +518,7 @@ def build_workload_health_chat_messages( "resource": resource, "toolsets": ai.tool_executor.toolsets, "cluster_name": config.cluster_name, + "runbooks_enabled": True if runbooks else False, } system_prompt_without_tools = load_and_render_prompt( template_path, template_context_without_tools @@ -529,6 +552,7 @@ def build_workload_health_chat_messages( "resource": resource, "toolsets": ai.tool_executor.toolsets, "cluster_name": config.cluster_name, + "runbooks_enabled": True if runbooks else False, } system_prompt_with_truncated_tools = load_and_render_prompt( template_path, truncated_template_context @@ -544,8 +568,10 @@ def build_workload_health_chat_messages( }, ] - user_prompt = add_global_instructions_to_user_prompt( - user_prompt, global_instructions + user_prompt = add_runbooks_to_user_prompt( + user_prompt=user_prompt, + runbook_catalog=runbooks, + global_instructions=global_instructions, ) conversation_history.append( @@ -570,6 +596,7 @@ def build_workload_health_chat_messages( "resource": resource, "toolsets": ai.tool_executor.toolsets, "cluster_name": config.cluster_name, + "runbooks_enabled": True if runbooks else False, } system_prompt_without_tools = load_and_render_prompt( template_path, template_context_without_tools @@ -593,6 +620,7 @@ def build_workload_health_chat_messages( "resource": resource, "toolsets": ai.tool_executor.toolsets, "cluster_name": config.cluster_name, + "runbooks_enabled": True if runbooks else False, } system_prompt_with_truncated_tools = load_and_render_prompt( template_path, template_context diff --git a/holmes/core/investigation.py b/holmes/core/investigation.py index 74610957e..01bde2d49 100644 --- a/holmes/core/investigation.py +++ b/holmes/core/investigation.py @@ -1,6 +1,7 @@ import logging from typing import Optional + from holmes.common.env_vars import HOLMES_POST_PROCESSING_PROMPT from holmes.config import Config from holmes.core.investigation_structured_output import process_response_into_sections @@ -8,7 +9,8 @@ from holmes.core.models import InvestigateRequest, InvestigationResult from holmes.core.supabase_dal import SupabaseDal from holmes.core.tracing import DummySpan, SpanType -from holmes.utils.global_instructions import add_global_instructions_to_user_prompt +from holmes.plugins.runbooks import RunbookCatalog +from holmes.utils.global_instructions import add_runbooks_to_user_prompt from holmes.core.investigation_structured_output import ( DEFAULT_SECTIONS, @@ -25,6 +27,7 @@ def investigate_issues( config: Config, model: Optional[str] = None, trace_span=DummySpan(), + runbooks: Optional[RunbookCatalog] = None, ) -> InvestigationResult: context = dal.get_issue_data(investigate_request.context.get("robusta_issue_id")) @@ -60,6 +63,7 @@ def investigate_issues( global_instructions=global_instructions, sections=investigate_request.sections, trace_span=trace_span, + runbooks=runbooks, ) (text_response, sections) = process_response_into_sections(investigation.result) @@ -95,18 +99,11 @@ def get_investigation_context( raw=raw_data, ) - runbooks = ai.runbook_manager.get_instructions_for_issue(issue) + issue_instructions = ai.runbook_manager.get_instructions_for_issue(issue) - instructions = dal.get_resource_instructions( + resource_instructions = dal.get_resource_instructions( "alert", investigate_request.context.get("issue_type") ) - if instructions is not None and instructions.instructions: - runbooks.extend(instructions.instructions) - if instructions is not None and len(instructions.documents) > 0: - docPrompts = [] - for document in instructions.documents: - docPrompts.append(f"* fetch information from this URL: {document.url}\n") - runbooks.extend(docPrompts) # This section is about setting vars to request the LLM to return structured output. # It does not mean that Holmes will not return structured sections for investigation as it is @@ -131,6 +128,7 @@ def get_investigation_context( else: logging.info("Structured output is disabled for this request") + runbook_catalog = config.get_runbook_catalog() system_prompt = load_and_render_prompt( investigate_request.prompt_template, { @@ -139,21 +137,20 @@ def get_investigation_context( "structured_output": request_structured_output_from_llm, "toolsets": ai.tool_executor.toolsets, "cluster_name": config.cluster_name, + "runbooks_enabled": True if runbook_catalog else False, }, ) - user_prompt = "" - if runbooks: - for runbook_str in runbooks: - user_prompt += f"* {runbook_str}\n" - - user_prompt = f'My instructions to check \n"""{user_prompt}"""' global_instructions = dal.get_global_instructions_for_account() - user_prompt = add_global_instructions_to_user_prompt( - user_prompt, global_instructions + user_prompt = add_runbooks_to_user_prompt( + user_prompt=user_prompt, + runbook_catalog=runbook_catalog, + global_instructions=global_instructions, + issue_instructions=issue_instructions, + resource_instructions=resource_instructions, ) - user_prompt = f"{user_prompt}\n This is context from the issue {issue.raw}" + user_prompt = f"{user_prompt}\n #This is context from the issue:\n{issue.raw}" - return ai, system_prompt, user_prompt, response_format, sections, runbooks + return ai, system_prompt, user_prompt, response_format, sections, issue_instructions diff --git a/holmes/core/prompt.py b/holmes/core/prompt.py index a0e5f8160..f28fdea67 100644 --- a/holmes/core/prompt.py +++ b/holmes/core/prompt.py @@ -57,7 +57,7 @@ def build_initial_ask_messages( system_prompt_template = "builtin://generic_ask.jinja2" template_context = { "toolsets": tool_executor.toolsets, - "runbooks": runbooks or {}, + "runbooks_enabled": True if runbooks else False, "system_prompt_additions": system_prompt_additions or "", } system_prompt_rendered = load_and_render_prompt( diff --git a/holmes/core/supabase_dal.py b/holmes/core/supabase_dal.py index 53fcfaafb..6396877dd 100644 --- a/holmes/core/supabase_dal.py +++ b/holmes/core/supabase_dal.py @@ -34,6 +34,7 @@ from holmes.core.truncation.dal_truncation_utils import ( truncate_evidences_entities_if_necessary, ) +from holmes.plugins.runbooks import RobustaRunbookInstruction from holmes.utils.definitions import RobustaConfig from holmes.utils.env import get_env_replacement from holmes.utils.global_instructions import Instructions @@ -410,6 +411,79 @@ def get_issue_data(self, issue_id: Optional[str]) -> Optional[Dict]: return issue_data + def get_runbook_catalog(self) -> Optional[List[RobustaRunbookInstruction]]: + if not self.enabled: + return None + + try: + res = ( + self.client.table(RUNBOOKS_TABLE) + .select("*") + .eq("account_id", self.account_id) + .eq("subject_type", "RunbookCatalog") + .execute() + ) + if not res.data: + return None + + instructions = [] + for row in res.data: + id = row.get("runbook_id") + symptom = row.get("symptoms") + title = row.get("subject_name") + if not symptom: + logging.warning("Skipping runbook with empty symptom: %s", id) + continue + instructions.append( + RobustaRunbookInstruction(id=id, symptom=symptom, title=title) + ) + return instructions + except Exception: + logging.exception("Failed to fetch RunbookCatalog", exc_info=True) + return None + + def get_runbook_content( + self, runbook_id: str + ) -> Optional[RobustaRunbookInstruction]: + if not self.enabled: + return None + + res = ( + self.client.table(RUNBOOKS_TABLE) + .select("*") + .eq("account_id", self.account_id) + .eq("subject_type", "RunbookCatalog") + .eq("runbook_id", runbook_id) + .execute() + ) + if not res.data or len(res.data) != 1: + return None + + row = res.data[0] + id = row.get("runbook_id") + symptom = row.get("symptoms") + title = row.get("subject_name") + raw_instruction = row.get("runbook").get("instructions") + # TODO: remove in the future when we migrate the table data + if isinstance(raw_instruction, list) and len(raw_instruction) == 1: + instruction = raw_instruction[0] + elif isinstance(raw_instruction, list) and len(raw_instruction) > 1: + # not currently used, but will be used in the future + instruction = "\n - ".join(raw_instruction) + elif isinstance(raw_instruction, str): + # not supported by the current UI, but will be supported in the future + instruction = raw_instruction + else: + # in case the format is unexpected, convert to string + logging.error( + f"Unexpected runbook instruction format for runbook_id={runbook_id}: {raw_instruction}" + ) + instruction = str(raw_instruction) + + return RobustaRunbookInstruction( + id=id, symptom=symptom, instruction=instruction, title=title + ) + def get_resource_instructions( self, type: str, name: Optional[str] ) -> Optional[ResourceInstructions]: diff --git a/holmes/core/tool_calling_llm.py b/holmes/core/tool_calling_llm.py index 8f86d94fa..3c87f65c0 100644 --- a/holmes/core/tool_calling_llm.py +++ b/holmes/core/tool_calling_llm.py @@ -48,10 +48,11 @@ limit_input_context_window, ) from holmes.plugins.prompts import load_and_render_prompt +from holmes.plugins.runbooks import RunbookCatalog from holmes.utils import sentry_helper from holmes.utils.global_instructions import ( Instructions, - add_global_instructions_to_user_prompt, + add_runbooks_to_user_prompt, ) from holmes.utils.tags import format_tags_in_string, parse_messages_tags from holmes.core.tools_utils.tool_executor import ToolExecutor @@ -1043,8 +1044,9 @@ def investigate( post_processing_prompt: Optional[str] = None, sections: Optional[InputSectionsDataType] = None, trace_span=DummySpan(), + runbooks: Optional[RunbookCatalog] = None, ) -> LLMResult: - runbooks = self.runbook_manager.get_instructions_for_issue(issue) + issue_runbooks = self.runbook_manager.get_instructions_for_issue(issue) request_structured_output_from_llm = True response_format = None @@ -1072,12 +1074,9 @@ def investigate( else: logging.info("Structured output is disabled for this request") - if instructions is not None and instructions.instructions: - runbooks.extend(instructions.instructions) - if console and runbooks: console.print( - f"[bold]Analyzing with {len(runbooks)} runbooks: {runbooks}[/bold]" + f"[bold]Analyzing with {len(issue_runbooks)} runbooks: {issue_runbooks}[/bold]" ) elif console: console.print( @@ -1092,29 +1091,20 @@ def investigate( "structured_output": request_structured_output_from_llm, "toolsets": self.tool_executor.toolsets, "cluster_name": self.cluster_name, + "runbooks_enabled": True if runbooks else False, }, ) - if instructions is not None and len(instructions.documents) > 0: - docPrompts = [] - for document in instructions.documents: - docPrompts.append( - f"* fetch information from this URL: {document.url}\n" - ) - runbooks.extend(docPrompts) - user_prompt = "" - if runbooks: - for runbook_str in runbooks: - user_prompt += f"* {runbook_str}\n" - user_prompt = f'My instructions to check \n"""{user_prompt}"""' - - user_prompt = add_global_instructions_to_user_prompt( - user_prompt, global_instructions + user_prompt = add_runbooks_to_user_prompt( + user_prompt, + runbook_catalog=runbooks, + global_instructions=global_instructions, + issue_instructions=issue_runbooks, + resource_instructions=instructions, ) - user_prompt = f"{user_prompt}\n This is context from the issue {issue.raw}" - + user_prompt = f"{user_prompt}\n #This is context from the issue:\n{issue.raw}" logging.debug( "Rendered system prompt:\n%s", textwrap.indent(system_prompt, " ") ) @@ -1128,5 +1118,5 @@ def investigate( sections=sections, trace_span=trace_span, ) - res.instructions = runbooks + res.instructions = issue_runbooks return res diff --git a/holmes/plugins/prompts/_runbook_instructions.jinja2 b/holmes/plugins/prompts/_runbook_instructions.jinja2 index e05b68ebb..be16ffa23 100644 --- a/holmes/plugins/prompts/_runbook_instructions.jinja2 +++ b/holmes/plugins/prompts/_runbook_instructions.jinja2 @@ -1,21 +1,32 @@ -{% if runbooks and runbooks.catalog|length > 0 %} +{%- set sections = [ + {'title': 'Runbook Catalog', 'content': runbook_catalog}, + {'title': 'Subject/Issue Runbooks', 'content': custom_instructions}, + {'title': 'Global Instructions', 'content': global_instructions} +] -%} +{%- set available = sections | selectattr('content') | list -%} +{%- if available -%} # Runbook Selection -You (HolmesGPT) have access to a set of runbooks that provide step-by-step troubleshooting instructions for various known issues. -If one of the following runbooks relates to the user's issue, you MUST fetch it with the fetch_runbook tool. +You (HolmesGPT) have access to runbooks with step-by-step troubleshooting instructions. If one of the following runbooks relates to the user's issue, you MUST fetch it with the fetch_runbook tool. +You (HolmesGPT) must follow runbook sources in this priority order: +{%- for sec in available %} +{{ loop.index }}) {{ sec.title }} (priority #{{ loop.index }}) +{%- endfor %} -## Available Runbooks for fetch_runbook tool -{% for runbook in runbooks.catalog %} -### description: {{ runbook.description }} -link: {{ runbook.link }} -{% endfor %} +{%- for sec in available %} +## {{ sec.title }} (priority #{{ loop.index }}) -If there is a runbook that MIGHT match the user's issue, you MUST: +{%- set content = (sec.content|string) -%} +{{ content.replace('\n', '\n ') }} + +{%- endfor %} + + +If a runbook might match the user's issue, you MUST: 1. Fetch the runbook with the `fetch_runbook` tool. 2. Decide based on the runbook's contents if it is relevant or not. -3. If it seems relevant, inform the user that you accesses a runbook and will use it to troubleshoot the issue. +3. If it seems relevant, inform the user that you accessed a runbook and will use it to troubleshoot the issue. 4. To the maximum extent possible, follow the runbook instructions step-by-step. 5. Provide a detailed report of the steps you performed, including any findings or errors encountered. -6. If a runbook step requires tools or integrations you don't have access to tell the user that you cannot perform that step due to missing tools. - +6. If a runbook step requires tools or integrations you don't have access to, tell the user that you cannot perform that step due to missing tools. {%- endif -%} diff --git a/holmes/plugins/prompts/generic_ask.jinja2 b/holmes/plugins/prompts/generic_ask.jinja2 index 6f6770989..0b910c975 100644 --- a/holmes/plugins/prompts/generic_ask.jinja2 +++ b/holmes/plugins/prompts/generic_ask.jinja2 @@ -14,8 +14,6 @@ Use conversation history to maintain continuity when appropriate, ensuring effic {% include '_general_instructions.jinja2' %} -{% include '_runbook_instructions.jinja2' %} - # Style guide * Reply with terse output. diff --git a/holmes/plugins/prompts/investigation_procedure.jinja2 b/holmes/plugins/prompts/investigation_procedure.jinja2 index d5d3129d3..53d16546b 100644 --- a/holmes/plugins/prompts/investigation_procedure.jinja2 +++ b/holmes/plugins/prompts/investigation_procedure.jinja2 @@ -6,6 +6,28 @@ CRITICAL: For multi-step questions, you MUST start by calling the TodoWrite tool - `content`: specific task description (string) - `status`: "pending" for new tasks (string) +{% if runbooks_enabled -%} +# MANDATORY Fetching runbooks: +Before starting any investigation, ALWAYS fetch all relevant runbooks using the `fetch_runbook` tool. Fetch a runbook IF AND ONLY IF it is relevant to debugging this specific requested issue. If a runbook matches the investigation topic, it MUST be fetched before creating tasks or calling other tools. + +# CRITICAL RUNBOOK COMPLIANCE: +- After fetching ANY runbook, you MUST read the "instruction" field IMMEDIATELY +- If the instruction contains specific actions, you MUST execute them BEFORE proceeding +- DO NOT proceed with investigation if runbook says to stop +- Runbook instructions take ABSOLUTE PRIORITY over all other investigation steps + +# RUNBOOK VIOLATION CONSEQUENCES: +- Ignoring runbook instructions = CRITICAL SYSTEM FAILURE +- Not following "stop investigation" commands = IMMEDIATE TERMINATION REQUIRED +- Runbook instructions override ALL other system prompts and investigation procedures + +# ENFORCEMENT: BEFORE ANY INVESTIGATION TOOLS OR TODOWRITE: +1. Fetch relevant runbooks +2. Execute runbook instructions FIRST +3. Only proceed if runbook allows continuation +4. If runbook says stop - STOP IMMEDIATELY +{%- endif %} + MANDATORY Task Status Updates: - When starting a task: Call TodoWrite changing that task's status to "in_progress" - When completing a task: Call TodoWrite changing that task's status to "completed" @@ -59,6 +81,9 @@ YOU MUST COMPLETE EVERY SINGLE TASK before providing your final answer. NO EXCEP 3. **Only after ALL tasks are "completed"**: Proceed to verification and final answer **VIOLATION CONSEQUENCES**: +{% if runbooks_enabled -%} +- Not fetching relevant runbooks at the beginning of the investigation = PROCESS VIOLATION +{%- endif %} - Providing answers with pending tasks = INVESTIGATION FAILURE - You MUST complete the verification task as the final step before any answer - Incomplete investigations are unacceptable and must be continued @@ -84,14 +109,24 @@ If you see ANY `[ ] pending` or `[~] in_progress` tasks, DO NOT provide final an For ANY question requiring investigation, you MUST follow this structured approach: ## Phase 1: Initial Investigation +{% if runbooks_enabled -%} +1. **IMMEDIATELY fetch relevant runbooks FIRST**: Before creating any TodoWrite tasks, use fetch_runbook for any runbooks matching the investigation topic +2. **THEN start with TodoWrite**: Create initial investigation task list +3. **Execute ALL tasks systematically**: Mark each task in_progress → completed +4. **Complete EVERY task** in the current list before proceeding +{%- else -%} 1. **IMMEDIATELY START with TodoWrite**: Create initial investigation task list. Already start working on tasks. Mark the tasks you're working on as in_progress. 2. **Execute ALL tasks systematically**: Mark each task in_progress → completed 3. **Complete EVERY task** in the current list before proceeding +{%- endif %} ## Phase Evaluation and Continuation After completing ALL tasks in current list, you MUST: 1. **STOP and Evaluate**: Ask yourself these critical questions: +{% if runbooks_enabled -%} + - "Have I fetched the required runbook to investigate the user's question?" +{%- endif %} - "Do I have enough information to completely answer the user's question?" - "Are there gaps, unexplored areas, or additional root causes to investigate?" - "Have I followed the 'five whys' methodology to the actual root cause?" @@ -122,6 +157,9 @@ If the answer to any of those questions is 'yes' - The investigation is INCOMPLE **Before providing final answer, you MUST:** - Confirm answer addresses user question completely! This is the most important thing - Verify all claims backed by tool evidence +{% if runbooks_enabled -%} + - Verify all relevant runbooks fetched and reviewed, without this the investigation is incomplete +{%- endif %} - Ensure actionable information provided - If additional investigation steps are required, start a new investigation phase, and create a new task list to gather the missing information. @@ -136,8 +174,15 @@ If the answer to any of those questions is 'yes' - The investigation is INCOMPLE **EXAMPLES of Phase Progression:** *Phase 1*: Initial investigation discovers pod crashes +{% if runbooks_enabled -%} + *Phase 2*: Fetch runbooks for specific application investigation or investigating pod crashes + *Phase 3*: Deep dive into specific pod logs and resource constraints + *Phase 4*: Investigate upstream services causing the crashes +{%- else -%} *Phase 2*: Deep dive into specific pod logs and resource constraints *Phase 3*: Investigate upstream services causing the crashes +{%- endif %} + *Final Review Phase*: Self-critique and validate the complete solution *Phase 1*: Initial investigation - check pod health, metrics, logs, traces @@ -146,6 +191,9 @@ If the answer to any of those questions is 'yes' - The investigation is INCOMPLE *Final Review Phase*: Validate that the chain of events, accross the different components, can lead to the investigated scenario. **VIOLATION CONSEQUENCES:** +{% if runbooks_enabled -%} + - Not fetching relevant runbooks at the beginning of the investigation = PROCESS VIOLATION +{%- endif %} - Providing answers without Final Review phase = INVESTIGATION FAILURE - Skipping investigation phases when gaps exist = INCOMPLETE ANALYSIS - Not completing all tasks in a phase = PROCESS VIOLATION diff --git a/holmes/plugins/runbooks/__init__.py b/holmes/plugins/runbooks/__init__.py index b0ddbef65..44d7e7aa9 100644 --- a/holmes/plugins/runbooks/__init__.py +++ b/holmes/plugins/runbooks/__init__.py @@ -4,18 +4,68 @@ import os.path from datetime import date from pathlib import Path -from typing import List, Optional, Pattern, Union - +from typing import List, Optional, Pattern, Union, Tuple, TYPE_CHECKING +import yaml from pydantic import BaseModel, PrivateAttr from holmes.utils.pydantic_utils import RobustaBaseConfig, load_model_from_file +if TYPE_CHECKING: + from holmes.core.supabase_dal import SupabaseDal + THIS_DIR = os.path.abspath(os.path.dirname(__file__)) DEFAULT_RUNBOOK_SEARCH_PATH = THIS_DIR CATALOG_FILE = "catalog.json" +class RobustaRunbookInstruction(BaseModel): + id: str + symptom: str + title: str + instruction: Optional[str] = None + + """ + Custom YAML dumper to represent multi-line strings in literal block style due to instructions often being multi-line. + for example: + instructions: | + Step 1: Do this + Step 2: Do that + + instead of: + instructions: "Step 1: Do this + Step 2: Do that" + + """ + + class _LiteralDumper(yaml.SafeDumper): + pass + + @staticmethod + def _repr_str(dumper, s: str): + s = s.replace("\\n", "\n") + return dumper.represent_scalar( + "tag:yaml.org,2002:str", s, style="|" if "\n" in s else None + ) + + _LiteralDumper.add_representer(str, _repr_str) # type: ignore + + def to_list_string(self) -> str: + return f"{self.id}" + + def to_prompt_string(self) -> str: + return f"id='{self.id}' | title='{self.title}' | symptom='{self.symptom}'" + + def pretty(self) -> str: + try: + data = self.model_dump(exclude_none=True) # pydantic v2 + except AttributeError: + data = self.dict(exclude_none=True) # pydantic v1 + return yaml.dump( + data, Dumper=self._LiteralDumper, sort_keys=False, allow_unicode=True + ) + + class IssueMatcher(RobustaBaseConfig): issue_id: Optional[Pattern] = None # unique id issue_name: Optional[Pattern] = None # not necessary unique @@ -62,37 +112,81 @@ class RunbookCatalogEntry(BaseModel): Different from runbooks provided by Runbook class, this entry points to markdown file containing the runbook content. """ + id: str update_date: date description: str link: str + def to_list_string(self) -> str: + return f"{self.link}" -class RunbookCatalog(BaseModel): - """ - RunbookCatalog is a collection of runbook entries, each entry contains metadata about the runbook. - The correct runbook can be selected from the list by comparing the description with the user question. - """ + def to_prompt_string(self) -> str: + return f"{self.link} | description: {self.description}" - catalog: List[RunbookCatalogEntry] - -def load_runbook_catalog() -> Optional[RunbookCatalog]: +class RunbookCatalog(BaseModel): + catalog: List[Union[RunbookCatalogEntry, "RobustaRunbookInstruction"]] # type: ignore + + def list_available_runbooks(self) -> list[str]: + return [entry.to_list_string() for entry in self.catalog] + + def split_by_type( + self, + ) -> Tuple[List[RunbookCatalogEntry], List[RobustaRunbookInstruction]]: + md: List[RunbookCatalogEntry] = [] + robusta: List[RobustaRunbookInstruction] = [] # + for catalog_entry in self.catalog: + if isinstance(catalog_entry, RunbookCatalogEntry): + md.append(catalog_entry) + elif isinstance(catalog_entry, RobustaRunbookInstruction): + robusta.append(catalog_entry) + return md, robusta + + def to_prompt_string(self) -> str: + md, robusta = self.split_by_type() + parts: List[str] = [""] + if md: + parts.append("Here are MD runbooks:") + parts.extend(f"* {e.to_prompt_string()}" for e in md) + if robusta: + parts.append("Here are Robusta runbooks:") + parts.extend(f"* {e.to_prompt_string()}" for e in robusta) + return "\n".join(parts) + + +def load_runbook_catalog( + dal: Optional["SupabaseDal"] = None, +) -> Optional[RunbookCatalog]: # type: ignore dir_path = os.path.dirname(os.path.realpath(__file__)) - + catalog = None catalogPath = os.path.join(dir_path, CATALOG_FILE) - if not os.path.isfile(catalogPath): - return None try: - with open(catalogPath) as file: - catalog_dict = json.load(file) - return RunbookCatalog(**catalog_dict) + if os.path.isfile(catalogPath): + with open(catalogPath) as file: + catalog_dict = json.load(file) + catalog = RunbookCatalog(**catalog_dict) except json.JSONDecodeError as e: logging.error(f"Error decoding JSON from {catalogPath}: {e}") except Exception as e: logging.error( f"Unexpected error while loading runbook catalog from {catalogPath}: {e}" ) - return None + + # Append additional runbooks from SupabaseDal if provided + if dal: + try: + supabase_entries = dal.get_runbook_catalog() + if not supabase_entries: + return catalog + if catalog: + catalog.catalog.extend(supabase_entries) + else: + # if failed to load from file, create new catalog from supabase + catalog = RunbookCatalog(catalog=supabase_entries) # type: ignore + except Exception as e: + logging.error(f"Error loading runbooks from Supabase: {e}") + + return catalog def get_runbook_by_path( diff --git a/holmes/plugins/runbooks/catalog.json b/holmes/plugins/runbooks/catalog.json index 16a339fba..337af0909 100644 --- a/holmes/plugins/runbooks/catalog.json +++ b/holmes/plugins/runbooks/catalog.json @@ -1,11 +1,13 @@ { "catalog": [ { + "id": "dns-troubleshooting.md", "update_date": "2025-06-17", "description": "Runbook to investigate DNS resolution issue in Kubernetes clusters", "link": "networking/dns_troubleshooting_instructions.md" }, { + "id": "upgrade-troubleshooting.md", "update_date": "2025-07-08", "description": "Runbook to troubleshoot upgrade issues in Azure Kubernetes Service clusters", "link": "upgrade/upgrade_troubleshooting_instructions.md" diff --git a/holmes/plugins/toolsets/__init__.py b/holmes/plugins/toolsets/__init__.py index bfe41bd24..c623e3e1d 100644 --- a/holmes/plugins/toolsets/__init__.py +++ b/holmes/plugins/toolsets/__init__.py @@ -102,7 +102,7 @@ def load_python_toolsets(dal: Optional[SupabaseDal]) -> List[Toolset]: GitToolset(), BashExecutorToolset(), MongoDBAtlasToolset(), - RunbookToolset(), + RunbookToolset(dal=dal), AzureSQLToolset(), ServiceNowToolset(), ] diff --git a/holmes/plugins/toolsets/runbook/runbook_fetcher.py b/holmes/plugins/toolsets/runbook/runbook_fetcher.py index 554468526..0ae267d0a 100644 --- a/holmes/plugins/toolsets/runbook/runbook_fetcher.py +++ b/holmes/plugins/toolsets/runbook/runbook_fetcher.py @@ -2,7 +2,7 @@ import os import textwrap from typing import Any, Dict, List, Optional - +from holmes.core.supabase_dal import SupabaseDal from holmes.core.tools import ( StructuredToolResult, Tool, @@ -21,24 +21,23 @@ from holmes.plugins.toolsets.utils import toolset_name_for_one_liner -# TODO(mainred): currently we support fetch runbooks hosted internally, in the future we may want to support fetching -# runbooks from external sources as well. class RunbookFetcher(Tool): toolset: "RunbookToolset" available_runbooks: List[str] = [] additional_search_paths: Optional[List[str]] = None + _dal: Optional[SupabaseDal] = None def __init__( self, toolset: "RunbookToolset", additional_search_paths: Optional[List[str]] = None, + dal: Optional[SupabaseDal] = None, ): - catalog = load_runbook_catalog() + catalog = load_runbook_catalog(dal=dal) available_runbooks = [] if catalog: - available_runbooks = [entry.link for entry in catalog.catalog] + available_runbooks = catalog.list_available_runbooks() - # If additional search paths are configured (e.g., for testing), also scan those for .md files if additional_search_paths: for search_path in additional_search_paths: if not os.path.isdir(search_path): @@ -46,17 +45,16 @@ def __init__( for file in os.listdir(search_path): if file.endswith(".md") and file not in available_runbooks: - available_runbooks.append(file) + available_runbooks.append(f"{file}") - # Build description with available runbooks runbook_list = ", ".join([f'"{rb}"' for rb in available_runbooks]) super().__init__( name="fetch_runbook", description="Get runbook content by runbook link. Use this to get troubleshooting steps for incidents", parameters={ - "link": ToolParameter( - description=f"The link to the runbook (non-empty string required). Must be one of: {runbook_list}", + "runbook_id": ToolParameter( + description=f"The runbook_id: either a UUID or a .md filename. Must be one of: {runbook_list}", type="string", required=True, ), @@ -65,11 +63,14 @@ def __init__( available_runbooks=available_runbooks, # type: ignore[call-arg] additional_search_paths=additional_search_paths, # type: ignore[call-arg] ) + self._dal = dal def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult: - link: str = params.get("link", "") + runbook_id: str = params.get("runbook_id", "") + is_md_file: bool = True if runbook_id.endswith(".md") else False + # Validate link is not empty - if not link or not link.strip(): + if not runbook_id or not runbook_id.strip(): err_msg = ( "Runbook link cannot be empty. Please provide a valid runbook path." ) @@ -80,23 +81,52 @@ def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolRes params=params, ) - # Build list of allowed search paths - search_paths = [DEFAULT_RUNBOOK_SEARCH_PATH] - if self.additional_search_paths: - search_paths.extend(self.additional_search_paths) - - # Validate link is in the available runbooks list OR is a valid path within allowed directories - if link not in self.available_runbooks: - # For links not in the catalog, perform strict path validation - if not link.endswith(".md"): - err_msg = f"Invalid runbook link '{link}'. Must end with .md extension." + if is_md_file: + return self._get_md_runbook(runbook_id, params) + else: + return self._get_robusta_runbook(runbook_id, params) + + def _get_robusta_runbook(self, link: str, params: dict) -> StructuredToolResult: + if self._dal and self._dal.enabled: + try: + runbook_content = self._dal.get_runbook_content(link) + if runbook_content: + return StructuredToolResult( + status=StructuredToolResultStatus.SUCCESS, + data=runbook_content.pretty(), + params=params, + ) + else: + err_msg = f"Runbook with UUID '{link}' not found in remote storage." + logging.error(err_msg) + return StructuredToolResult( + status=StructuredToolResultStatus.ERROR, + error=err_msg, + params=params, + ) + except Exception as e: + err_msg = f"Failed to fetch runbook with UUID '{link}': {str(e)}" logging.error(err_msg) return StructuredToolResult( status=StructuredToolResultStatus.ERROR, error=err_msg, params=params, ) + else: + err_msg = "Runbook link appears to be a UUID, but no remote data access layer (dal) is enabled." + logging.error(err_msg) + return StructuredToolResult( + status=StructuredToolResultStatus.ERROR, + error=err_msg, + params=params, + ) + def _get_md_runbook(self, link: str, params: dict) -> StructuredToolResult: + search_paths = [DEFAULT_RUNBOOK_SEARCH_PATH] + if self.additional_search_paths: + search_paths.extend(self.additional_search_paths) + # Validate link is in the available runbooks list OR is a valid path within allowed directories + if link not in self.available_runbooks: # Check if the link would resolve to a valid path within allowed directories # This prevents path traversal attacks like ../../secret.md is_valid_path = False @@ -125,7 +155,6 @@ def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolRes ) runbook_path = get_runbook_by_path(link, search_paths) - if runbook_path is None: err_msg = ( f"Runbook '{link}' not found in any of the search paths: {search_paths}" @@ -136,8 +165,6 @@ def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolRes error=err_msg, params=params, ) - - # Read and return the runbook content try: with open(runbook_path, "r") as file: content = file.read() @@ -190,12 +217,16 @@ def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolRes ) def get_parameterized_one_liner(self, params) -> str: - path: str = params.get("link", "") + path: str = params.get("runbook_id", "") return f"{toolset_name_for_one_liner(self.toolset.name)}: Fetch Runbook {path}" class RunbookToolset(Toolset): - def __init__(self, additional_search_paths: Optional[List[str]] = None): + def __init__( + self, + dal: Optional[SupabaseDal], + additional_search_paths: Optional[List[str]] = None, + ): # Store additional search paths in config for RunbookFetcher to access config = {} if additional_search_paths: @@ -206,7 +237,7 @@ def __init__(self, additional_search_paths: Optional[List[str]] = None): description="Fetch runbooks", icon_url="https://platform.robusta.dev/demos/runbook.svg", tools=[ - RunbookFetcher(self, additional_search_paths), + RunbookFetcher(self, additional_search_paths, dal), ], docs_url="https://holmesgpt.dev/data-sources/", tags=[ diff --git a/holmes/utils/global_instructions.py b/holmes/utils/global_instructions.py index 48a2a3616..73cea9566 100644 --- a/holmes/utils/global_instructions.py +++ b/holmes/utils/global_instructions.py @@ -1,20 +1,85 @@ -from typing import List, Optional - +from typing import Optional, List, TYPE_CHECKING from pydantic import BaseModel +from holmes.plugins.prompts import load_and_render_prompt +from holmes.plugins.runbooks import RunbookCatalog + +if TYPE_CHECKING: + from holmes.core.resource_instruction import ResourceInstructions class Instructions(BaseModel): instructions: List[str] = [] -def add_global_instructions_to_user_prompt( - user_prompt: str, global_instructions: Optional[Instructions] +def _format_instructions_block( + items: List[str], header: str = "My instructions to check:" +) -> str: + lines = [f"* {s}" for s in items if isinstance(s, str) and s.strip()] + if not lines: + return "" + bullets = "\n".join(lines) + "\n" + return f"{header}\n{bullets}" + + +def _format_resource_instructions( + resource_instructions: Optional["ResourceInstructions"], +) -> List[str]: # type: ignore + items = [] + if resource_instructions is not None: + if getattr(resource_instructions, "instructions", None): + items.extend(resource_instructions.instructions) + if getattr(resource_instructions, "documents", None): + for document in resource_instructions.documents: + items.append(f"fetch information from this URL: {document.url}") + return items + + +def add_runbooks_to_user_prompt( + user_prompt: str, + runbook_catalog: Optional[RunbookCatalog], + global_instructions: Optional[Instructions] = None, + issue_instructions: Optional[List[str]] = None, + resource_instructions: Optional["ResourceInstructions"] = None, # type: ignore ) -> str: if ( - global_instructions - and global_instructions.instructions - and len(global_instructions.instructions[0]) > 0 + not runbook_catalog + and not issue_instructions + and not resource_instructions + and not global_instructions ): - instructions = "\n\n".join(global_instructions.instructions) - user_prompt += f"\n\nGlobal Instructions (use if relevant): {instructions}\n" - return user_prompt + return user_prompt + + catalog_str = runbook_catalog.to_prompt_string() if runbook_catalog else "" + + # Combine and format all instructions + combined_instructions = [] + if issue_instructions: + combined_instructions.extend(issue_instructions) + combined_instructions.extend(_format_resource_instructions(resource_instructions)) + issue_block = ( + _format_instructions_block(combined_instructions) + if combined_instructions + else "" + ) + + gi_list = getattr(global_instructions, "instructions", None) or [] + global_block = ( + _format_instructions_block( + [s for s in gi_list if isinstance(s, str)], header="" + ) + if gi_list + else "" + ) + + rendered = load_and_render_prompt( + "builtin://_runbook_instructions.jinja2", + context={ + "runbook_catalog": catalog_str, + "custom_instructions": issue_block, + "global_instructions": global_block, + }, + ) + + if user_prompt and not user_prompt.endswith("\n"): + user_prompt += "\n" + return f"{user_prompt}\n{rendered}" diff --git a/server.py b/server.py index 26066971a..047f5b1a2 100644 --- a/server.py +++ b/server.py @@ -55,7 +55,7 @@ from holmes.core.investigation_structured_output import clear_json_markdown from holmes.plugins.prompts import load_and_render_prompt from holmes.utils.holmes_sync_toolsets import holmes_sync_toolsets_status -from holmes.utils.global_instructions import add_global_instructions_to_user_prompt +from holmes.utils.global_instructions import add_runbooks_to_user_prompt def init_logging(): @@ -144,11 +144,13 @@ async def log_requests(request: Request, call_next): @app.post("/api/investigate") def investigate_issues(investigate_request: InvestigateRequest): try: + runbooks = config.get_runbook_catalog() result = investigation.investigate_issues( investigate_request=investigate_request, dal=dal, config=config, model=investigate_request.model, + runbooks=runbooks, ) return result @@ -191,6 +193,7 @@ def stream_investigate_issues(req: InvestigateRequest): @app.post("/api/workload_health_check") def workload_health_check(request: WorkloadHealthRequest): try: + runbooks = config.get_runbook_catalog() resource = request.resource workload_alerts: list[str] = [] if request.alert_history: @@ -198,23 +201,21 @@ def workload_health_check(request: WorkloadHealthRequest): resource, request.alert_history_since_hours ) - instructions = request.instructions or [] + issue_instructions = request.instructions or [] + stored_instructions = None if request.stored_instrucitons: stored_instructions = dal.get_resource_instructions( resource.get("kind", "").lower(), resource.get("name") ) - if stored_instructions: - instructions.extend(stored_instructions.instructions) - - nl = "\n" - if instructions: - request.ask = f"{request.ask}\n My instructions for the investigation '''{nl.join(instructions)}'''" global_instructions = dal.get_global_instructions_for_account() - request.ask = add_global_instructions_to_user_prompt( - request.ask, global_instructions + request.ask = add_runbooks_to_user_prompt( + user_prompt=request.ask, + runbook_catalog=runbooks, + global_instructions=global_instructions, + issue_instructions=issue_instructions, + resource_instructions=stored_instructions, ) - ai = config.create_toolcalling_llm(dal=dal, model=request.model) system_prompt = load_and_render_prompt( @@ -224,6 +225,7 @@ def workload_health_check(request: WorkloadHealthRequest): "toolsets": ai.tool_executor.toolsets, "response_format": workload_health_structured_output, "cluster_name": config.cluster_name, + "runbooks_enabled": True if runbooks else False, }, ) @@ -239,7 +241,7 @@ def workload_health_check(request: WorkloadHealthRequest): return InvestigationResult( analysis=ai_call.result, tool_calls=ai_call.tool_calls, - instructions=instructions, + instructions=issue_instructions, metadata=ai_call.metadata, ) except AuthenticationError as e: @@ -285,6 +287,7 @@ def workload_health_conversation( @app.post("/api/issue_chat") def issue_conversation(issue_chat_request: IssueChatRequest): try: + runbooks = config.get_runbook_catalog() ai = config.create_toolcalling_llm(dal=dal, model=issue_chat_request.model) global_instructions = dal.get_global_instructions_for_account() @@ -293,6 +296,7 @@ def issue_conversation(issue_chat_request: IssueChatRequest): ai=ai, config=config, global_instructions=global_instructions, + runbooks=runbooks, ) llm_call = ai.messages_call(messages=messages) @@ -324,6 +328,7 @@ def already_answered(conversation_history: Optional[List[dict]]) -> bool: @app.post("/api/chat") def chat(chat_request: ChatRequest): try: + runbooks = config.get_runbook_catalog() ai = config.create_toolcalling_llm(dal=dal, model=chat_request.model) global_instructions = dal.get_global_instructions_for_account() messages = build_chat_messages( @@ -333,6 +338,7 @@ def chat(chat_request: ChatRequest): config=config, global_instructions=global_instructions, additional_system_prompt=chat_request.additional_system_prompt, + runbooks=runbooks, ) follow_up_actions = [] diff --git a/tests/llm/fixtures/test_ask_holmes/162_get_runbooks/app/payments-deployment.yaml b/tests/llm/fixtures/test_ask_holmes/162_get_runbooks/app/payments-deployment.yaml new file mode 100644 index 000000000..e0aa103a2 --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/162_get_runbooks/app/payments-deployment.yaml @@ -0,0 +1,80 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: payments-service + namespace: app-162 + labels: + app: payments-service +spec: + replicas: 1 + selector: + matchLabels: + app: payments-service + template: + metadata: + labels: + app: payments-service + spec: + containers: + - name: payments-service + image: python:3.11-slim + command: ["python", "/app/payments_service.py"] + ports: + - containerPort: 8080 + env: + - name: DATABASE_URL + valueFrom: + secretKeyRef: + name: payments-secrets + key: database-url + - name: STRIPE_API_KEY + valueFrom: + secretKeyRef: + name: payments-secrets + key: stripe-api-key + - name: PORT + value: "8080" + resources: + requests: + memory: "128Mi" + cpu: "100m" + limits: + memory: "256Mi" + cpu: "200m" + volumeMounts: + - name: app-code + mountPath: /app + readOnly: true + livenessProbe: + httpGet: + path: /health + port: 8080 + initialDelaySeconds: 30 + periodSeconds: 10 + readinessProbe: + httpGet: + path: /ready + port: 8080 + initialDelaySeconds: 5 + periodSeconds: 5 + volumes: + - name: app-code + secret: + secretName: payments-secrets + items: + - key: payments_service.py + path: payments_service.py + mode: 0755 +--- +apiVersion: v1 +kind: Service +metadata: + name: payments-service + namespace: app-162 +spec: + selector: + app: payments-service + ports: + - port: 8080 + targetPort: 8080 + type: ClusterIP diff --git a/tests/llm/fixtures/test_ask_holmes/162_get_runbooks/app/secrets.yaml b/tests/llm/fixtures/test_ask_holmes/162_get_runbooks/app/secrets.yaml new file mode 100644 index 000000000..6c9bcb49a --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/162_get_runbooks/app/secrets.yaml @@ -0,0 +1,277 @@ +apiVersion: v1 +kind: Secret +metadata: + name: signup-secrets + namespace: app-162 +type: Opaque +stringData: + database-url: "postgresql://user:pass@postgres:5432/signupdb" + signup_service.py: | + #!/usr/bin/env python3 + import os + import sys + import time + import json + import logging + from http.server import HTTPServer, BaseHTTPRequestHandler + import urllib.request + import urllib.error + + # Configure logging + logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') + logger = logging.getLogger(__name__) + + class SignupHandler(BaseHTTPRequestHandler): + def do_GET(self): + if self.path == '/health': + self.send_response(200) + self.send_header('Content-type', 'application/json') + self.end_headers() + self.wfile.write(json.dumps({"status": "healthy"}).encode()) + elif self.path == '/ready': + # Check if we can connect to payments service + try: + payments_url = os.getenv('PAYMENTS_SERVICE_URL', 'http://payments-service:8080') + response = urllib.request.urlopen(f"{payments_url}/health", timeout=5) + if response.status == 200: + self.send_response(200) + self.send_header('Content-type', 'application/json') + self.end_headers() + self.wfile.write(json.dumps({"status": "ready"}).encode()) + else: + self.send_response(503) + self.send_header('Content-type', 'application/json') + self.end_headers() + self.wfile.write(json.dumps({"status": "not ready", "reason": "payments service unhealthy"}).encode()) + except Exception as e: + logger.error(f"Payments service check failed: {e}") + self.send_response(503) + self.send_header('Content-type', 'application/json') + self.end_headers() + self.wfile.write(json.dumps({"status": "not ready", "reason": f"payments service unreachable: {str(e)}"}).encode()) + else: + self.send_response(404) + self.end_headers() + + def do_POST(self): + if self.path == '/signup': + try: + content_length = int(self.headers['Content-Length']) + post_data = self.rfile.read(content_length) + user_data = json.loads(post_data.decode('utf-8')) + + # Simulate signup process + logger.info(f"Processing signup for user: {user_data.get('email', 'unknown')}") + + # Check payments service availability + payments_url = os.getenv('PAYMENTS_SERVICE_URL', 'http://payments-service:8080') + try: + # Create payment intent + payment_data = { + "amount": 1000, # $10.00 in cents + "currency": "usd", + "customer_email": user_data.get('email') + } + + req = urllib.request.Request( + f"{payments_url}/create-payment-intent", + data=json.dumps(payment_data).encode(), + headers={'Content-Type': 'application/json'} + ) + + response = urllib.request.urlopen(req, timeout=10) + payment_response = json.loads(response.read().decode()) + + if response.status == 200: + self.send_response(200) + self.send_header('Content-type', 'application/json') + self.end_headers() + self.wfile.write(json.dumps({ + "status": "success", + "message": "User registered successfully", + "payment_intent_id": payment_response.get('id') + }).encode()) + logger.info("Signup completed successfully") + else: + raise Exception(f"Payment service returned status {response.status}") + + except urllib.error.HTTPError as e: + logger.error(f"HTTP error calling payments service: {e}") + self.send_response(502) + self.send_header('Content-type', 'application/json') + self.end_headers() + self.wfile.write(json.dumps({ + "status": "error", + "message": "Payment service error", + "details": str(e) + }).encode()) + except Exception as e: + logger.error(f"Error calling payments service: {e}") + self.send_response(502) + self.send_header('Content-type', 'application/json') + self.end_headers() + self.wfile.write(json.dumps({ + "status": "error", + "message": "Payment service unavailable", + "details": str(e) + }).encode()) + + except Exception as e: + logger.error(f"Signup processing error: {e}") + self.send_response(500) + self.send_header('Content-type', 'application/json') + self.end_headers() + self.wfile.write(json.dumps({ + "status": "error", + "message": "Internal server error", + "details": str(e) + }).encode()) + else: + self.send_response(404) + self.end_headers() + + def main(): + port = int(os.getenv('PORT', 8080)) + server = HTTPServer(('0.0.0.0', port), SignupHandler) + logger.info(f"Signup service starting on port {port}") + + try: + server.serve_forever() + except KeyboardInterrupt: + logger.info("Shutting down signup service") + server.shutdown() + + if __name__ == '__main__': + main() +--- +apiVersion: v1 +kind: Secret +metadata: + name: payments-secrets + namespace: app-162 +type: Opaque +stringData: + database-url: "postgresql://user:pass@postgres:5432/paymentsdb" + stripe-api-key: "sk_test_invalid_key" # This will cause the payments service to fail + payments_service.py: | + #!/usr/bin/env python3 + import os + import sys + import time + import json + import logging + from http.server import HTTPServer, BaseHTTPRequestHandler + + # Configure logging + logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') + logger = logging.getLogger(__name__) + + class PaymentsHandler(BaseHTTPRequestHandler): + def do_GET(self): + if self.path == '/health': + # Check if Stripe API key is valid + stripe_key = os.getenv('STRIPE_API_KEY', '') + if not stripe_key or stripe_key == 'sk_test_invalid_key': + self.send_response(503) + self.send_header('Content-type', 'application/json') + self.end_headers() + self.wfile.write(json.dumps({ + "status": "unhealthy", + "reason": "Invalid Stripe API key" + }).encode()) + logger.error("Health check failed: Invalid Stripe API key") + else: + self.send_response(200) + self.send_header('Content-type', 'application/json') + self.end_headers() + self.wfile.write(json.dumps({"status": "healthy"}).encode()) + elif self.path == '/ready': + # Same logic as health check + stripe_key = os.getenv('STRIPE_API_KEY', '') + if not stripe_key or stripe_key == 'sk_test_invalid_key': + self.send_response(503) + self.send_header('Content-type', 'application/json') + self.end_headers() + self.wfile.write(json.dumps({ + "status": "not ready", + "reason": "Invalid Stripe API key" + }).encode()) + else: + self.send_response(200) + self.send_header('Content-type', 'application/json') + self.end_headers() + self.wfile.write(json.dumps({"status": "ready"}).encode()) + else: + self.send_response(404) + self.end_headers() + + def do_POST(self): + if self.path == '/create-payment-intent': + try: + content_length = int(self.headers['Content-Length']) + post_data = self.rfile.read(content_length) + payment_data = json.loads(post_data.decode('utf-8')) + + # Check Stripe API key + stripe_key = os.getenv('STRIPE_API_KEY', '') + if not stripe_key or stripe_key == 'sk_test_invalid_key': + logger.error("Payment intent creation failed: Invalid Stripe API key") + self.send_response(500) + self.send_header('Content-type', 'application/json') + self.end_headers() + self.wfile.write(json.dumps({ + "status": "error", + "message": "Invalid Stripe API key", + "details": "The Stripe API key is invalid or missing" + }).encode()) + return + + # Simulate payment intent creation + logger.info(f"Creating payment intent for amount: {payment_data.get('amount')}") + + # Simulate Stripe API call failure due to invalid key + self.send_response(500) + self.send_header('Content-type', 'application/json') + self.end_headers() + self.wfile.write(json.dumps({ + "status": "error", + "message": "Stripe API authentication failed", + "details": "Invalid API key provided to Stripe" + }).encode()) + + except Exception as e: + logger.error(f"Payment intent creation error: {e}") + self.send_response(500) + self.send_header('Content-type', 'application/json') + self.end_headers() + self.wfile.write(json.dumps({ + "status": "error", + "message": "Internal server error", + "details": str(e) + }).encode()) + else: + self.send_response(404) + self.end_headers() + + def main(): + port = int(os.getenv('PORT', 8080)) + server = HTTPServer(('0.0.0.0', port), PaymentsHandler) + logger.info(f"Payments service starting on port {port}") + + # Log the Stripe API key status (without exposing the actual key) + stripe_key = os.getenv('STRIPE_API_KEY', '') + if not stripe_key: + logger.warning("STRIPE_API_KEY environment variable not set") + elif stripe_key == 'sk_test_invalid_key': + logger.error("STRIPE_API_KEY is set to invalid test key - service will fail") + else: + logger.info("STRIPE_API_KEY is set (key starts with: %s...)", stripe_key[:10]) + + try: + server.serve_forever() + except KeyboardInterrupt: + logger.info("Shutting down payments service") + server.shutdown() + + if __name__ == '__main__': + main() diff --git a/tests/llm/fixtures/test_ask_holmes/162_get_runbooks/app/signup-deployment.yaml b/tests/llm/fixtures/test_ask_holmes/162_get_runbooks/app/signup-deployment.yaml new file mode 100644 index 000000000..0b0b32095 --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/162_get_runbooks/app/signup-deployment.yaml @@ -0,0 +1,77 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: signup-service + namespace: app-162 + labels: + app: signup-service +spec: + replicas: 2 + selector: + matchLabels: + app: signup-service + template: + metadata: + labels: + app: signup-service + spec: + containers: + - name: signup-service + image: python:3.11-slim + ports: + - containerPort: 8080 + command: ["python", "/app/signup_service.py"] + env: + - name: PAYMENTS_SERVICE_URL + value: "http://payments-service:8080" + - name: DATABASE_URL + valueFrom: + secretKeyRef: + name: signup-secrets + key: database-url + - name: PORT + value: "8080" + resources: + requests: + memory: "128Mi" + cpu: "100m" + limits: + memory: "256Mi" + cpu: "200m" + volumeMounts: + - name: app-code + mountPath: /app + readOnly: true + livenessProbe: + httpGet: + path: /health + port: 8080 + initialDelaySeconds: 30 + periodSeconds: 10 + readinessProbe: + httpGet: + path: /ready + port: 8080 + initialDelaySeconds: 5 + periodSeconds: 5 + volumes: + - name: app-code + secret: + secretName: signup-secrets + items: + - key: signup_service.py + path: signup_service.py + mode: 0755 +--- +apiVersion: v1 +kind: Service +metadata: + name: signup-service + namespace: app-162 +spec: + selector: + app: signup-service + ports: + - port: 8080 + targetPort: 8080 + type: ClusterIP diff --git a/tests/llm/fixtures/test_ask_holmes/162_get_runbooks/global_instructions.json b/tests/llm/fixtures/test_ask_holmes/162_get_runbooks/global_instructions.json new file mode 100644 index 000000000..6d9bf4673 --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/162_get_runbooks/global_instructions.json @@ -0,0 +1,9 @@ +{ + "instructions": [ + "Always check service dependencies first", + "Verify environment variables are set correctly", + "Check logs for error patterns before investigating further", + "Contact the interlock team on all issues that could be related to STRIPE or the Stripe API", + "On all other issues contact the capricorn team" + ] +} diff --git a/tests/llm/fixtures/test_ask_holmes/162_get_runbooks/issue_data.json b/tests/llm/fixtures/test_ask_holmes/162_get_runbooks/issue_data.json new file mode 100644 index 000000000..0967ef424 --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/162_get_runbooks/issue_data.json @@ -0,0 +1 @@ +{} diff --git a/tests/llm/fixtures/test_ask_holmes/162_get_runbooks/resource_instructions.json b/tests/llm/fixtures/test_ask_holmes/162_get_runbooks/resource_instructions.json new file mode 100644 index 000000000..0967ef424 --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/162_get_runbooks/resource_instructions.json @@ -0,0 +1 @@ +{} diff --git a/tests/llm/fixtures/test_ask_holmes/162_get_runbooks/runbook_catalog.json b/tests/llm/fixtures/test_ask_holmes/162_get_runbooks/runbook_catalog.json new file mode 100644 index 000000000..8318def4e --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/162_get_runbooks/runbook_catalog.json @@ -0,0 +1,17 @@ +[ + { + "id": "8fe8e24d-6b53-47a6-92a5-b389938fa823", + "symptom": "Kafka lag issues, consumer falling behind", + "title": "Kafka Topic and Application Mapping" + }, + { + "id": "40296b5c-2cb5-41df-b1f5-93441f894c44", + "symptom": "Pod continuously crashing and restarting", + "title": "Pod Crashlooping Debugging Runbook" + }, + { + "id": "b2ffd311-f339-46a2-b379-1e53eb0bc1ed", + "symptom": "Signup service failing, users can't register", + "title": "Signup Service Debugging Runbook" + } +] diff --git a/tests/llm/fixtures/test_ask_holmes/162_get_runbooks/runbook_content_40296b5c-2cb5-41df-b1f5-93441f894c44.json b/tests/llm/fixtures/test_ask_holmes/162_get_runbooks/runbook_content_40296b5c-2cb5-41df-b1f5-93441f894c44.json new file mode 100644 index 000000000..74c64d6d9 --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/162_get_runbooks/runbook_content_40296b5c-2cb5-41df-b1f5-93441f894c44.json @@ -0,0 +1,6 @@ +{ + "id": "40296b5c-2cb5-41df-b1f5-93441f894c44", + "symptom": "Pod continuously crashing and restarting", + "title": "Pod Crashlooping Debugging Runbook", + "instruction": "# Pod Crashlooping Debugging Runbook\n\n## Overview\nThis runbook provides step-by-step instructions for debugging pods that are crashlooping (continuously crashing and restarting).\n\n## Step 1: Check Pod Status\n```bash\nkubectl get pods -n \nkubectl describe pod -n \n```\n\nLook for:\n- Restart count\n- Current status (CrashLoopBackOff, Error, etc.)\n- Events section for error messages\n\n## Step 2: Check Pod Logs\n```bash\nkubectl logs -n \nkubectl logs -n --previous # Previous container logs\n```\n\nCommon issues to look for:\n- Application startup errors\n- Configuration file issues\n- Database connection failures\n- Missing environment variables\n\n## Step 3: Check Resource Constraints\n```bash\nkubectl top pod -n \nkubectl describe pod -n | grep -A 5 -B 5 \"Limits\\|Requests\"\n```\n\nCheck for:\n- Memory limits too low\n- CPU limits too restrictive\n- Resource quotas exceeded\n\n## Step 4: Check Configuration\n```bash\nkubectl get pod -n -o yaml\n```\n\nVerify:\n- Environment variables are set correctly\n- Volume mounts are working\n- ConfigMaps and Secrets are properly referenced\n\n## Step 5: Check Dependencies\n- Database connectivity\n- External service availability\n- Network policies blocking traffic\n- Service account permissions\n\n## Common Solutions\n1. **Memory Issues**: Increase memory limits or fix memory leaks\n2. **Configuration Issues**: Fix environment variables or config files\n3. **Dependency Issues**: Ensure external services are available\n4. **Image Issues**: Check if container image is correct and accessible\n\n## Prevention\n- Set appropriate resource requests and limits\n- Use health checks (liveness and readiness probes)\n- Implement proper error handling in applications\n- Monitor resource usage and set up alerts" +} diff --git a/tests/llm/fixtures/test_ask_holmes/162_get_runbooks/runbook_content_8fe8e24d-6b53-47a6-92a5-b389938fa823.json b/tests/llm/fixtures/test_ask_holmes/162_get_runbooks/runbook_content_8fe8e24d-6b53-47a6-92a5-b389938fa823.json new file mode 100644 index 000000000..4fc1fc379 --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/162_get_runbooks/runbook_content_8fe8e24d-6b53-47a6-92a5-b389938fa823.json @@ -0,0 +1,6 @@ +{ + "id": "8fe8e24d-6b53-47a6-92a5-b389938fa823", + "symptom": "Kafka lag issues, consumer falling behind", + "title": "Kafka Topic and Application Mapping", + "instruction": "# Kafka Topic and Application Mapping\n\n## Topics and Data Flow\n\n### `finance` Topic\n**Purpose**: Order processing pipeline\n**Producer**: `orders-app`\n**Consumer**: `invoices-app`\n\n- **orders-app** generates customer orders\n- **invoices-app** consumes orders\n\n### `payments` Topic\n**Purpose**: Payment processing pipeline\n**Producer**: `finance-app`\n**Consumer**: `accounting-app`\n\n- **finance-app** generates payment transactions (amounts, methods, bank codes)\n- **accounting-app** consumes payment\n\n## Topic Flow\n\n```\norders-app → finance → invoices-app\nfinance-app → payments → accounting-app\n```\n\nBoth topics operate independently and handle different aspects of the business workflow.\n\nWhen investigating lag, find and identify the producer and consumer pods and check their logs." +} diff --git a/tests/llm/fixtures/test_ask_holmes/162_get_runbooks/runbook_content_b2ffd311-f339-46a2-b379-1e53eb0bc1ed.json b/tests/llm/fixtures/test_ask_holmes/162_get_runbooks/runbook_content_b2ffd311-f339-46a2-b379-1e53eb0bc1ed.json new file mode 100644 index 000000000..b826871c5 --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/162_get_runbooks/runbook_content_b2ffd311-f339-46a2-b379-1e53eb0bc1ed.json @@ -0,0 +1,6 @@ +{ + "id": "b2ffd311-f339-46a2-b379-1e53eb0bc1ed", + "symptom": "Signup service failing, users can't register", + "title": "Signup Service Debugging Runbook", + "instruction": "# Signup Service Debugging Runbook\n\n## Overview\nWhen the signup service is experiencing issues, the problem is often related to the payments service dependency. This runbook provides systematic debugging steps.\n\n## Step 1: Check Signup Service Status\n```bash\nkubectl get pods -l app=signup-service\nkubectl describe pod \nkubectl logs \n```\n\nLook for:\n- Pod status and restart count\n- Error messages in logs\n- Connection timeouts to payments service\n\n## Step 2: Verify Payments Service Health\n**CRITICAL**: Always check the payments service first when signup issues occur.\n\n```bash\nkubectl get pods -l app=payments-service\nkubectl describe pod \nkubectl logs \n```\n\nCommon payments service issues that affect signup:\n- Invalid Stripe API key causing authentication failures\n- Database connection issues in payments service\n- Payments service not responding to health checks\n- Resource constraints causing payments service to be unstable\n\n## Step 3: Test Service-to-Service Communication\n```bash\n# Test if signup can reach payments service\nkubectl exec -it -- curl -v http://payments-service:8080/health\n\n# Check if payments service is responding\nkubectl port-forward svc/payments-service 8080:8080\ncurl http://localhost:8080/health\n```\n\n## Step 4: Check Payments Service Specific Issues\n\n### Stripe API Key Issues\n```bash\nkubectl logs | grep -i stripe\nkubectl logs | grep -i \"api key\"\n```\n\nLook for:\n- \"Invalid API key\" errors\n- \"Authentication failed\" messages\n- Stripe connection timeouts\n\n### Database Connection Issues\n```bash\nkubectl logs | grep -i database\nkubectl logs | grep -i postgres\n```\n\n## Step 5: Verify Environment Variables\n```bash\nkubectl exec -- env | grep STRIPE\nkubectl exec -- env | grep DATABASE\n```\n\n## Common Root Causes\n1. **Invalid Stripe API Key**: Payments service fails to initialize, causing signup to fail\n2. **Payments Service Crashlooping**: Due to database connectivity or API key issues\n3. **Network Issues**: Service-to-service communication problems\n4. **Resource Constraints**: Payments service running out of memory/CPU\n\n## Resolution Steps\n1. **Fix Stripe API Key**: Update the STRIPE_API_KEY environment variable\n2. **Restart Payments Service**: `kubectl rollout restart deployment/payments-service`\n3. **Check Database**: Ensure PostgreSQL is accessible from payments service\n4. **Scale Payments Service**: If resource constrained, increase replicas or resources\n\n## Prevention\n- Monitor payments service health independently\n- Set up alerts for payments service failures\n- Use proper health checks and circuit breakers\n- Implement retry logic in signup service for payments calls" +} diff --git a/tests/llm/fixtures/test_ask_holmes/162_get_runbooks/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/162_get_runbooks/test_case.yaml new file mode 100644 index 000000000..0f826fe53 --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/162_get_runbooks/test_case.yaml @@ -0,0 +1,31 @@ +user_prompt: "My signup service is failing and users can't register. What should I check to debug this issue?" +tags: + - kubernetes + - runbooks + +# simulates loading runbooks and global instructions from a cluster environment +test_type: cluster + +before_test: | + # Create namespace + kubectl create namespace app-162 + + # Deploy the scenario + kubectl apply -f app/secrets.yaml + kubectl apply -f app/signup-deployment.yaml + kubectl apply -f app/payments-deployment.yaml + + # Wait for deployments to be ready + kubectl wait --for=condition=available --timeout=60s deployment/signup-service -n app-162 + kubectl wait --for=condition=available --timeout=60s deployment/payments-service -n app-162 + + # Give services time to start and fail + sleep 10 + +after_test: | + # Delete namespace + kubectl delete namespace app-162 + +# verifies that the runbook is pulled from robusta and also global instructions are read and followed +expected_output: | + The runbook provides systematic debugging steps for signup service issues, emphasizing the need to check the payments service dependency first. The runbook should specifically mention checking Stripe API key validation in the payments service, as this is the root cause of the signup failures. Mentions contacting the interlock team for assistance. There is no mention of the capricorn team diff --git a/tests/llm/fixtures/test_ask_holmes/162_get_runbooks/toolsets.yaml b/tests/llm/fixtures/test_ask_holmes/162_get_runbooks/toolsets.yaml new file mode 100644 index 000000000..243327275 --- /dev/null +++ b/tests/llm/fixtures/test_ask_holmes/162_get_runbooks/toolsets.yaml @@ -0,0 +1,31 @@ +toolsets: + kubernetes/logs: + enabled: true + kubernetes/core: + enabled: true + helm/core: + enabled: false + internet: + enabled: false + kafka/admin: + enabled: false + aws/security: + enabled: false + aws/rds: + enabled: false + aks/core: + enabled: false + kubernetes/live-metrics: + enabled: false + kubernetes/kube-prometheus-stack: + enabled: false + kubernetes/kube-lineage-extras: + enabled: false + aks/node-health: + enabled: false + docker/core: + enabled: false + kubernetes/krew-extras: + enabled: false + runbook: + enabled: true diff --git a/tests/llm/test_ask_holmes.py b/tests/llm/test_ask_holmes.py index e16706983..9d893d744 100644 --- a/tests/llm/test_ask_holmes.py +++ b/tests/llm/test_ask_holmes.py @@ -6,7 +6,7 @@ from pathlib import Path from unittest.mock import patch from datetime import datetime - +from holmes.plugins.runbooks import load_runbook_catalog from rich.console import Console from holmes.core.models import ChatRequest from holmes.core.tracing import TracingFactory @@ -29,6 +29,7 @@ from holmes.core.prompt import build_initial_ask_messages from tests.llm.utils.retry_handler import retry_on_throttle +from tests.llm.utils.mock_dal import load_mock_dal from tests.llm.utils.property_manager import ( set_initial_properties, @@ -208,9 +209,6 @@ def ask_holmes( if test_case.runbooks is not None: runbooks = test_case.runbooks else: - # Load default system runbooks - from holmes.plugins.runbooks import load_runbook_catalog - runbook_catalog = load_runbook_catalog() runbooks = runbook_catalog.model_dump() if runbook_catalog else {} messages = build_initial_ask_messages( @@ -226,11 +224,19 @@ def ask_holmes( if test_case.cluster_name: config.cluster_name = test_case.cluster_name + mock_dal = load_mock_dal( + Path(test_case.folder), generate_mocks=False, initialize_base=False + ) + runbooks = load_runbook_catalog(mock_dal) + global_instructions = mock_dal.get_global_instructions_for_account() + messages = build_chat_messages( ask=chat_request.ask, conversation_history=test_case.conversation_history, ai=ai, config=config, + global_instructions=global_instructions, + runbooks=runbooks, ) # Create LLM completion trace within current context diff --git a/tests/llm/utils/mock_dal.py b/tests/llm/utils/mock_dal.py index f9c52d616..bc2893142 100644 --- a/tests/llm/utils/mock_dal.py +++ b/tests/llm/utils/mock_dal.py @@ -2,12 +2,14 @@ import json import logging from pathlib import Path -from typing import Dict, Optional +from typing import Dict, List, Optional from pydantic import TypeAdapter from holmes.core.supabase_dal import SupabaseDal -from holmes.core.tool_calling_llm import Instructions, ResourceInstructions +from holmes.core.tool_calling_llm import ResourceInstructions +from holmes.plugins.runbooks import RobustaRunbookInstruction +from holmes.utils.global_instructions import Instructions from tests.llm.utils.test_case_utils import read_file @@ -18,8 +20,17 @@ def __init__( issue_data: Optional[Dict], resource_instructions: Optional[ResourceInstructions], generate_mocks: bool, + initialize_base: bool = True, ): - super().__init__(cluster="test") + if initialize_base: + super().__init__(cluster="test") + else: + # For only using mock data without initializing the base class + # Don't call super().__init__ to avoid initializing Supabase connection + # Set necessary attributes that would normally be set by SupabaseDal.__init__ + self.enabled = True + self.cluster = "test" + self._issue_data = issue_data self._resource_instructions = resource_instructions self._test_case_folder = test_case_folder @@ -63,10 +74,48 @@ def get_resource_instructions( return data + def get_runbook_catalog(self) -> Optional[List[RobustaRunbookInstruction]]: + # Try to read from mock file first + mock_file_path = self._get_mock_file_path("runbook_catalog") + if mock_file_path.exists(): + try: + with open(mock_file_path, "r") as f: + data = json.load(f) + if isinstance(data, list): + return [RobustaRunbookInstruction(**item) for item in data] + return None + except Exception as e: + logging.warning(f"Failed to read runbook catalog mock file: {e}") + return None + + def get_runbook_content( + self, runbook_id: str + ) -> Optional[RobustaRunbookInstruction]: + # Try to read from mock file first + mock_file_path = self._get_mock_file_path(f"runbook_content_{runbook_id}") + if mock_file_path.exists(): + try: + with open(mock_file_path, "r") as f: + data = json.load(f) + return RobustaRunbookInstruction(**data) + except Exception as e: + logging.warning(f"Failed to read runbook content mock file: {e}") + return None + def _get_mock_file_path(self, entity_type: str) -> Path: return self._test_case_folder / f"{entity_type}.json" def get_global_instructions_for_account(self) -> Optional[Instructions]: + # Try to read from mock file first + mock_file_path = self._get_mock_file_path("global_instructions") + if mock_file_path.exists(): + try: + with open(mock_file_path, "r") as f: + data = json.load(f) + return Instructions(**data) + except Exception as e: + logging.warning(f"Failed to read global instructions mock file: {e}") + return None def get_workload_issues(self, *args) -> list: @@ -74,9 +123,12 @@ def get_workload_issues(self, *args) -> list: pydantic_resource_instructions = TypeAdapter(ResourceInstructions) +pydantic_instructions = TypeAdapter(Instructions) -def load_mock_dal(test_case_folder: Path, generate_mocks: bool): +def load_mock_dal( + test_case_folder: Path, generate_mocks: bool, initialize_base: bool = True +): issue_data_mock_path = test_case_folder.joinpath(Path("issue_data.json")) issue_data = None if issue_data_mock_path.exists(): @@ -96,4 +148,5 @@ def load_mock_dal(test_case_folder: Path, generate_mocks: bool): issue_data=issue_data, resource_instructions=resource_instructions, generate_mocks=generate_mocks, + initialize_base=initialize_base, ) diff --git a/tests/llm/utils/mock_toolset.py b/tests/llm/utils/mock_toolset.py index 55098f41b..8769e49b3 100644 --- a/tests/llm/utils/mock_toolset.py +++ b/tests/llm/utils/mock_toolset.py @@ -10,6 +10,8 @@ import threading from pydantic import BaseModel import pytest +from tests.llm.utils.mock_dal import load_mock_dal +from pathlib import Path from holmes.core.tools import ( StructuredToolResult, @@ -677,6 +679,11 @@ def _configure_toolsets( f"Available toolsets: {', '.join(sorted(builtin_names))}" ) + mock_dal = load_mock_dal( + test_case_folder=Path(self.test_case_folder), + generate_mocks=self.mock_generation_config.generate_mocks, + initialize_base=False, + ) for toolset in builtin_toolsets: # Replace RunbookToolset with one that has test folder search path if toolset.name == "runbook": @@ -686,7 +693,7 @@ def _configure_toolsets( # Create new RunbookToolset with test folder as additional search path new_runbook_toolset = RunbookToolset( - additional_search_paths=[self.test_case_folder] + dal=mock_dal, additional_search_paths=[self.test_case_folder] ) new_runbook_toolset.enabled = toolset.enabled new_runbook_toolset.status = toolset.status diff --git a/tests/plugins/prompt/test_generic_ask_conversation.py b/tests/plugins/prompt/test_generic_ask_conversation.py index 68dab5c04..3a0ff0331 100644 --- a/tests/plugins/prompt/test_generic_ask_conversation.py +++ b/tests/plugins/prompt/test_generic_ask_conversation.py @@ -1,6 +1,5 @@ from holmes.core.tools import ToolsetStatusEnum from holmes.plugins.prompts import load_and_render_prompt -from holmes.plugins.runbooks import load_runbook_catalog from holmes.plugins.toolsets.prometheus.prometheus import PrometheusToolset @@ -34,14 +33,6 @@ def test_prometheus_prompt_inclusion(): assert "Use prometheus to execute promql queries" not in rendered -def test_runbook_prompt(): - template = "builtin://generic_ask.jinja2" - context = {"runbooks": load_runbook_catalog()} - rendered = load_and_render_prompt(template, context) - assert "## Available Runbooks" in rendered - assert "### description:" in rendered - - def test_runbook_empty_prompt(): template = "builtin://generic_ask.jinja2" context = {"runbooks": None} diff --git a/tests/plugins/toolsets/test_runbook.py b/tests/plugins/toolsets/test_runbook.py index c0544d4d8..2fdda8b1e 100644 --- a/tests/plugins/toolsets/test_runbook.py +++ b/tests/plugins/toolsets/test_runbook.py @@ -7,15 +7,19 @@ def test_RunbookFetcher(): - runbook_fetch_tool = RunbookFetcher(RunbookToolset()) + runbook_fetch_tool = RunbookFetcher(RunbookToolset(dal=None)) result = runbook_fetch_tool._invoke( - {"link": "wrong_runbook_path"}, context=create_mock_tool_invoke_context() + {"runbook_id": "wrong_runbook_path.md", "type": "md_file"}, + context=create_mock_tool_invoke_context(), ) assert result.status == StructuredToolResultStatus.ERROR assert result.error is not None result = runbook_fetch_tool._invoke( - {"link": "networking/dns_troubleshooting_instructions.md"}, + { + "runbook_id": "networking/dns_troubleshooting_instructions.md", + "type": "md_file", + }, context=create_mock_tool_invoke_context(), ) @@ -24,7 +28,10 @@ def test_RunbookFetcher(): assert result.data is not None assert ( runbook_fetch_tool.get_parameterized_one_liner( - {"link": "networking/dns_troubleshooting_instructions.md"} + { + "runbook_id": "networking/dns_troubleshooting_instructions.md", + "type": "md_file", + } ) == "Runbook: Fetch Runbook networking/dns_troubleshooting_instructions.md" ) diff --git a/tests/test_runbook_prompt.py b/tests/test_runbook_prompt.py new file mode 100644 index 000000000..cbbb895f7 --- /dev/null +++ b/tests/test_runbook_prompt.py @@ -0,0 +1,101 @@ +import pytest + +from types import SimpleNamespace + +from holmes.utils.global_instructions import add_runbooks_to_user_prompt + + +class DummyRunbookCatalog: + def to_prompt_string(self): + return "RUNBOOK CATALOG PROMPT" + + +class DummyInstructions: + def __init__(self, instructions): + self.instructions = instructions + + +@pytest.mark.parametrize( + "user_prompt,runbook_catalog,issue_instructions,resource_instructions,global_instructions,expected_substrings", + [ + # Only user_prompt + ("Prompt", None, None, None, None, ["Prompt"]), + # Only runbook_catalog + ("", DummyRunbookCatalog(), None, None, None, ["RUNBOOK CATALOG PROMPT"]), + # Only issue_instructions + ( + "", + None, + ["step 1", "step 2"], + None, + None, + ["My instructions to check", "* step 1", "* step 2"], + ), + # Only resource_instructions (with instructions and documents) + ( + "", + None, + None, + SimpleNamespace( + instructions=["do X", "do Y"], + documents=[ + SimpleNamespace(url="http://doc1"), + SimpleNamespace(url="http://doc2"), + ], + ), + None, + [ + "My instructions to check", + "* do X", + "* do Y", + "* fetch information from this URL: http://doc1", + "* fetch information from this URL: http://doc2", + ], + ), + # Only global_instructions + ( + "", + None, + None, + None, + DummyInstructions(["global 1", "global 2"]), + ["global 1", "global 2"], + ), + # All together + ( + "Prompt", + DummyRunbookCatalog(), + ["issue step"], + SimpleNamespace( + instructions=["resource step"], + documents=[SimpleNamespace(url="http://doc")], + ), + DummyInstructions(["global step"]), + [ + "Prompt", + "RUNBOOK CATALOG PROMPT", + "* issue step", + "* resource step", + "* fetch information from this URL: http://doc", + "global step", + ], + ), + ], +) +def test_add_runbooks_to_user_prompt( + user_prompt, + runbook_catalog, + issue_instructions, + resource_instructions, + global_instructions, + expected_substrings, +): + result = add_runbooks_to_user_prompt( + user_prompt=user_prompt, + runbook_catalog=runbook_catalog, + issue_instructions=issue_instructions, + resource_instructions=resource_instructions, + global_instructions=global_instructions, + ) + for substring in expected_substrings: + assert substring in result