diff --git a/src/aks-agent/HISTORY.rst b/src/aks-agent/HISTORY.rst
new file mode 100644
index 00000000000..f193943bdf5
--- /dev/null
+++ b/src/aks-agent/HISTORY.rst
@@ -0,0 +1,17 @@
+.. :changelog:
+
+Release History
+===============
+
+Guidance
+++++++++
+If there is no rush to release a new version, please just add a description of the modification under the *Pending* section.
+
+To release a new version, please select a new version number (usually plus 1 to last patch version, X.Y.Z -> Major.Minor.Patch, more details in `\doc `_), and then add a new section named as the new version number in this file, the content should include the new modifications and everything from the *Pending* section. Finally, update the `VERSION` variable in `setup.py` with this new version number.
+
+Pending
++++++++
+
+1.0.0b1
++++++++
+* Add interactive AI-powered debugging tool `az aks agent`.
diff --git a/src/aks-agent/README.rst b/src/aks-agent/README.rst
new file mode 100644
index 00000000000..c813113aeec
--- /dev/null
+++ b/src/aks-agent/README.rst
@@ -0,0 +1,80 @@
+Azure CLI AKS Agent Extension
+===============================
+
+Introduction
+============
+
+The AKS Agent extension provides the "az aks agent" command, an AI-powered assistant that
+helps analyze and troubleshoot Azure Kubernetes Service (AKS) clusters using Large Language
+Models (LLMs). The agent combines cluster context, configurable toolsets, and LLMs to answer
+natural-language questions about your cluster (for example, "Why are my pods not starting?")
+and can investigate issues in both interactive and non-interactive (batch) modes.
+
+Key capabilities
+----------------
+
+- Interactive and non-interactive modes (use --no-interactive for batch runs).
+- Support for multiple LLM providers (Azure OpenAI, OpenAI, etc.) via environment variables.
+- Configurable via a JSON/YAML config file provided with --config-file.
+- Control echo and tool output visibility with --no-echo-request and --show-tool-output.
+- Refresh the available toolsets with --refresh-toolsets.
+
+Prerequisites
+-------------
+
+Before using the agent, make sure provider-specific environment variables are set. For
+example, Azure OpenAI typically requires AZURE_API_BASE, AZURE_API_VERSION, and AZURE_API_KEY,
+while OpenAI requires OPENAI_API_KEY. For more details about supported providers and required
+variables, see: https://docs.litellm.ai/docs/providers
+
+Quick start and examples
+========================
+
+Install the extension
+---------------------
+
+.. code-block:: bash
+
+ az extension add --name aks-agent
+
+Run the agent (Azure OpenAI example)
+-----------------------------------
+
+.. code-block:: bash
+
+ export AZURE_API_BASE="https://my-azureopenai-service.openai.azure.com/"
+ export AZURE_API_VERSION="2025-01-01-preview"
+ export AZURE_API_KEY="sk-xxx"
+
+ az aks agent "Why are my pods not starting?" --name MyManagedCluster --resource-group MyResourceGroup --model azure/my-gpt4.1-deployment
+
+Run the agent (OpenAI example)
+------------------------------
+
+.. code-block:: bash
+
+ export OPENAI_API_KEY="sk-xxx"
+ az aks agent "Why are my pods not starting?" --name MyManagedCluster --resource-group MyResourceGroup --model gpt-4o
+
+Run in non-interactive batch mode
+---------------------------------
+
+.. code-block:: bash
+
+ az aks agent "Diagnose networking issues" --no-interactive --max-steps 15 --model azure/my-gpt4.1-deployment
+
+Using a configuration file
+--------------------------
+
+Pass a config file with --config-file to predefine model, credentials, and toolsets. See
+the example config and more detailed examples in the help definition at
+`src/aks-agent/azext_aks_agent/_help.py`.
+
+More help
+---------
+
+For a complete list of parameters, detailed examples and help text, run:
+
+.. code-block:: bash
+
+ az aks agent -h
diff --git a/src/aks-agent/azext_aks_agent/__init__.py b/src/aks-agent/azext_aks_agent/__init__.py
new file mode 100644
index 00000000000..213cda1be6f
--- /dev/null
+++ b/src/aks-agent/azext_aks_agent/__init__.py
@@ -0,0 +1,36 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for license information.
+# --------------------------------------------------------------------------------------------
+
+
+from azure.cli.core import AzCommandsLoader
+
+# pylint: disable=unused-import
+import azext_aks_agent._help
+
+
+class ContainerServiceCommandsLoader(AzCommandsLoader):
+
+ def __init__(self, cli_ctx=None):
+ from azure.cli.core.commands import CliCommandType
+
+ aks_agent_custom = CliCommandType(operations_tmpl='azext_aks_agent.custom#{}')
+ super().__init__(
+ cli_ctx=cli_ctx,
+ custom_command_type=aks_agent_custom,
+ )
+
+ def load_command_table(self, args):
+ super().load_command_table(args)
+ from azext_aks_agent.commands import load_command_table
+ load_command_table(self, args)
+ return self.command_table
+
+ def load_arguments(self, command):
+ super().load_arguments(command)
+ from azext_aks_agent._params import load_arguments
+ load_arguments(self, command)
+
+
+COMMAND_LOADER_CLS = ContainerServiceCommandsLoader
diff --git a/src/aks-agent/azext_aks_agent/_consts.py b/src/aks-agent/azext_aks_agent/_consts.py
new file mode 100644
index 00000000000..5355b049263
--- /dev/null
+++ b/src/aks-agent/azext_aks_agent/_consts.py
@@ -0,0 +1,10 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for license information.
+# --------------------------------------------------------------------------------------------
+
+# aks agent constants
+CONST_AGENT_CONFIG_PATH_DIR_ENV_KEY = "HOLMES_CONFIGPATH_DIR"
+CONST_AGENT_NAME = "AKS AGENT"
+CONST_AGENT_NAME_ENV_KEY = "AGENT_NAME"
+CONST_AGENT_CONFIG_FILE_NAME = "aksAgent.yaml"
diff --git a/src/aks-agent/azext_aks_agent/_help.py b/src/aks-agent/azext_aks_agent/_help.py
new file mode 100644
index 00000000000..d97a1ccedc9
--- /dev/null
+++ b/src/aks-agent/azext_aks_agent/_help.py
@@ -0,0 +1,106 @@
+# coding=utf-8
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for license information.
+# --------------------------------------------------------------------------------------------
+
+# pylint: disable=too-many-lines
+
+from knack.help_files import helps
+
+
+helps[
+ "aks agent"
+] = """
+ type: command
+ short-summary: Run AI assistant to analyze and troubleshoot Kubernetes clusters.
+ long-summary: |-
+ This command allows you to ask questions about your Azure Kubernetes cluster and get answers using AI models.
+ Environment variables must be set to use the AI model, please refer to https://docs.litellm.ai/docs/providers to learn more about supported AI providers and models and required environment variables.
+ parameters:
+ - name: --name -n
+ type: string
+ short-summary: Name of the managed cluster.
+ - name: --resource-group -g
+ type: string
+ short-summary: Name of the resource group.
+ - name: --model
+ type: string
+ short-summary: Model to use for the LLM.
+ - name: --api-key
+ type: string
+ short-summary: API key to use for the LLM (if not given, uses environment variables AZURE_API_KEY, OPENAI_API_KEY).
+ - name: --config-file
+ type: string
+ short-summary: Path to configuration file.
+ - name: --max-steps
+ type: int
+ short-summary: Maximum number of steps the LLM can take to investigate the issue.
+ - name: --no-interactive
+ type: bool
+ short-summary: Disable interactive mode. When set, the agent will not prompt for input and will run in batch mode.
+ - name: --no-echo-request
+ type: bool
+ short-summary: Disable echoing back the question provided to AKS Agent in the output.
+ - name: --show-tool-output
+ type: bool
+ short-summary: Show the output of each tool that was called during the analysis.
+ - name: --refresh-toolsets
+ type: bool
+ short-summary: Refresh the toolsets status.
+
+ examples:
+ - name: Ask about pod issues in the cluster with Azure OpenAI
+ text: |-
+ export AZURE_API_BASE="https://my-azureopenai-service.openai.azure.com/"
+ export AZURE_API_VERSION="2025-01-01-preview"
+ export AZURE_API_KEY="sk-xxx"
+ az aks agent "Why are my pods not starting?" --name MyManagedCluster --resource-group MyResourceGroup --model azure/my-gpt4.1-deployment
+ - name: Ask about pod issues in the cluster with OpenAI
+ text: |-
+ export OPENAI_API_KEY="sk-xxx"
+ az aks agent "Why are my pods not starting?" --name MyManagedCluster --resource-group MyResourceGroup --model gpt-4o
+ - name: Run in interactive mode without a question
+ text: az aks agent "Check the pod status in my cluster" --name MyManagedCluster --resource-group MyResourceGroup --model azure/my-gpt4.1-deployment --api-key "sk-xxx"
+ - name: Run in non-interactive batch mode
+ text: az aks agent "Diagnose networking issues" --no-interactive --max-steps 15 --model azure/my-gpt4.1-deployment
+ - name: Show detailed tool output during analysis
+ text: az aks agent "Why is my service workload unavailable in namespace workload-ns?" --show-tool-output --model azure/my-gpt4.1-deployment
+ - name: Use custom configuration file
+ text: az aks agent "Check kubernetes pod resource usage" --config-file /path/to/custom.yaml --model azure/my-gpt4.1-deployment
+ - name: Run agent with no echo of the original question
+ text: az aks agent "What is the status of my cluster?" --no-echo-request --model azure/my-gpt4.1-deployment
+ - name: Refresh toolsets to get the latest available tools
+ text: az aks agent "What is the status of my cluster?" --refresh-toolsets --model azure/my-gpt4.1-deployment
+ - name: Run agent with config file
+ text: |
+ az aks agent "Check kubernetes pod resource usage" --config-file /path/to/custom.yaml
+ Here is an example of config file:
+ ```json
+ model: "gpt-4o"
+ api_key: "..."
+ # define a list of mcp servers, mcp server can be defined
+ mcp_servers:
+ aks_mcp:
+ description: "The AKS-MCP is a Model Context Protocol (MCP) server that enables AI assistants to interact with Azure Kubernetes Service (AKS) clusters"
+ url: "http://localhost:8003/sse"
+
+ # try adding your own tools or toggle the built-in toolsets here
+ # e.g. query company-specific data, fetch logs from your existing observability tools, etc
+ # To check how to add a customized toolset, please refer to https://docs.robusta.dev/master/configuration/holmesgpt/custom_toolsets.html#custom-toolsets
+ # To find all built-in toolsets, please refer to https://docs.robusta.dev/master/configuration/holmesgpt/builtin_toolsets.html
+ toolsets:
+ # add a new json processor toolset
+ json_processor:
+ description: "A toolset for processing JSON data using jq"
+ prerequisites:
+ - command: "jq --version" # Ensure jq is installed
+ tools:
+ - name: "process_json"
+ description: "A tool that uses jq to process JSON input"
+ command: "echo '{{ json_input }}' | jq '.'" # Example jq command to format JSON
+ # disable a built-in toolsets
+ aks/core:
+ enabled: false
+ ```
+"""
diff --git a/src/aks-agent/azext_aks_agent/_params.py b/src/aks-agent/azext_aks_agent/_params.py
new file mode 100644
index 00000000000..89c2abc7f6f
--- /dev/null
+++ b/src/aks-agent/azext_aks_agent/_params.py
@@ -0,0 +1,79 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for license information.
+# --------------------------------------------------------------------------------------------
+
+# pylint: disable=too-many-statements,too-many-lines
+import os.path
+
+from azure.cli.core.api import get_config_dir
+
+from azext_aks_agent._consts import CONST_AGENT_CONFIG_FILE_NAME
+
+from azext_aks_agent._validators import validate_agent_config_file
+
+
+def load_arguments(self, _):
+ with self.argument_context("aks agent") as c:
+ c.positional(
+ "prompt",
+ help="Ask any question and answer using available tools.",
+ )
+ c.argument(
+ "resource_group_name",
+ options_list=["--resource-group", "-g"],
+ help="Name of resource group.",
+ required=False,
+ )
+ c.argument(
+ "name",
+ options_list=["--name", "-n"],
+ help="Name of the managed cluster.",
+ required=False,
+ )
+ c.argument(
+ "max_steps",
+ type=int,
+ default=10,
+ required=False,
+ help="Maximum number of steps the LLM can take to investigate the issue.",
+ )
+ c.argument(
+ "config_file",
+ default=os.path.join(get_config_dir(), CONST_AGENT_CONFIG_FILE_NAME),
+ validator=validate_agent_config_file,
+ required=False,
+ help="Path to the config file.",
+ )
+ c.argument(
+ "model",
+ help="The model to use for the LLM.",
+ required=False,
+ type=str,
+ )
+ c.argument(
+ "api-key",
+ help="API key to use for the LLM (if not given, uses environment variables AZURE_API_KEY, OPENAI_API_KEY)",
+ required=False,
+ type=str,
+ )
+ c.argument(
+ "no_interactive",
+ help="Disable interactive mode. When set, the agent will not prompt for input and will run in batch mode.",
+ action="store_true",
+ )
+ c.argument(
+ "no_echo_request",
+ help="Disable echoing back the question provided to AKS Agent in the output.",
+ action="store_true",
+ )
+ c.argument(
+ "show_tool_output",
+ help="Show the output of each tool that was called.",
+ action="store_true",
+ )
+ c.argument(
+ "refresh_toolsets",
+ help="Refresh the toolsets status.",
+ action="store_true",
+ )
diff --git a/src/aks-agent/azext_aks_agent/_validators.py b/src/aks-agent/azext_aks_agent/_validators.py
new file mode 100644
index 00000000000..5644e2790d7
--- /dev/null
+++ b/src/aks-agent/azext_aks_agent/_validators.py
@@ -0,0 +1,53 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for license information.
+# --------------------------------------------------------------------------------------------
+
+from __future__ import unicode_literals
+
+import os
+import os.path
+
+import yaml
+from azext_aks_agent._consts import CONST_AGENT_CONFIG_FILE_NAME
+from azure.cli.core.api import get_config_dir
+from azure.cli.core.azclierror import InvalidArgumentValueError
+from knack.log import get_logger
+
+logger = get_logger(__name__)
+
+
+def _validate_param_yaml_file(yaml_path, param_name):
+ if not yaml_path:
+ return
+ if not os.path.exists(yaml_path):
+ raise InvalidArgumentValueError(
+ f"--{param_name}={yaml_path}: file is not found."
+ )
+ if not os.access(yaml_path, os.R_OK):
+ raise InvalidArgumentValueError(
+ f"--{param_name}={yaml_path}: file is not readable."
+ )
+ try:
+ with open(yaml_path, "r") as file:
+ yaml.safe_load(file)
+ except yaml.YAMLError as e:
+ raise InvalidArgumentValueError(
+ f"--{param_name}={yaml_path}: file is not a valid YAML file: {e}"
+ )
+ except Exception as e:
+ raise InvalidArgumentValueError(
+ f"--{param_name}={yaml_path}: An error occurred while reading the config file: {e}"
+ )
+
+
+def validate_agent_config_file(namespace):
+ config_file = namespace.config_file
+ if not config_file:
+ return
+ # default config file path can be empty
+ default_config_path = os.path.join(get_config_dir(), CONST_AGENT_CONFIG_FILE_NAME)
+ if config_file == default_config_path and not os.path.exists(config_file):
+ return
+
+ _validate_param_yaml_file(config_file, "config-file")
diff --git a/src/aks-agent/azext_aks_agent/agent/__init__.py b/src/aks-agent/azext_aks_agent/agent/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/src/aks-agent/azext_aks_agent/agent/agent.py b/src/aks-agent/azext_aks_agent/agent/agent.py
new file mode 100644
index 00000000000..d456a93458a
--- /dev/null
+++ b/src/aks-agent/azext_aks_agent/agent/agent.py
@@ -0,0 +1,210 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for license information.
+# --------------------------------------------------------------------------------------------
+
+import logging
+import os
+import socket
+import sys
+import uuid
+from pathlib import Path
+
+from azext_aks_agent._consts import (
+ CONST_AGENT_CONFIG_PATH_DIR_ENV_KEY,
+ CONST_AGENT_NAME,
+ CONST_AGENT_NAME_ENV_KEY,
+)
+from azure.cli.core.api import get_config_dir
+from azure.cli.core.commands.client_factory import get_subscription_id
+from knack.util import CLIError
+
+from .prompt import AKS_CONTEXT_PROMPT
+from .telemetry import CLITelemetryClient
+
+
+# NOTE(mainred): holmes leverage the log handler RichHandler to provide colorful, readable and well-formatted logs
+# making the interactive mode more user-friendly.
+# And we removed exising log handlers to avoid duplicate logs.
+# Also make the console log consistent, we remove the telemetry and data logger to skip redundant logs.
+def init_log():
+ # NOTE(mainred): we need to disable INFO logs from LiteLLM before LiteLLM library is loaded, to avoid logging the
+ # debug logs from heading of LiteLLM.
+ logging.getLogger("LiteLLM").setLevel(logging.WARNING)
+ logging.getLogger("telemetry.main").setLevel(logging.WARNING)
+ logging.getLogger("telemetry.process").setLevel(logging.WARNING)
+ logging.getLogger("telemetry.save").setLevel(logging.WARNING)
+ logging.getLogger("telemetry.client").setLevel(logging.WARNING)
+ logging.getLogger("az_command_data_logger").setLevel(logging.WARNING)
+
+ from holmes.utils.console.logging import init_logging
+
+ # TODO: make log verbose configurable, currently disabled by [].
+ return init_logging([])
+
+
+# pylint: disable=too-many-locals
+def aks_agent(
+ cmd,
+ resource_group_name,
+ name,
+ prompt,
+ model,
+ api_key,
+ max_steps,
+ config_file,
+ no_interactive,
+ no_echo_request,
+ show_tool_output,
+ refresh_toolsets,
+):
+ """
+ Interact with the AKS agent using a prompt or piped input.
+
+ :param prompt: The prompt to send to the agent.
+ :type prompt: str
+ :param model: The model to use for the LLM.
+ :type model: str
+ :param max_steps: Maximum number of steps to take.
+ :type max_steps: int
+ :param config_file: Path to the config file.
+ :type config_file: str
+ :param no_interactive: Disable interactive mode.
+ :type no_interactive: bool
+ :param no_echo_request: Disable echoing back the question provided to AKS Agent in the output.
+ :type no_echo_request: bool
+ :param show_tool_output: Whether to show tool output.
+ :type show_tool_output: bool
+ :param refresh_toolsets: Refresh the toolsets status.
+ :type refresh_toolsets: bool
+ """
+ with CLITelemetryClient():
+
+ if sys.version_info < (3, 10):
+ raise CLIError(
+ "Please upgrade the python version to 3.10 or above to use aks agent."
+ )
+
+ # reverse the value of the variables so that
+ interactive = not no_interactive
+ echo = not no_echo_request
+
+ console = init_log()
+
+ os.environ[CONST_AGENT_CONFIG_PATH_DIR_ENV_KEY] = get_config_dir()
+ # Holmes library allows the user to specify the agent name through environment variable
+ # before loading the library.
+
+ os.environ[CONST_AGENT_NAME_ENV_KEY] = CONST_AGENT_NAME
+
+ from holmes.config import Config
+ from holmes.core.prompt import build_initial_ask_messages
+ from holmes.interactive import run_interactive_loop
+ from holmes.plugins.destinations import DestinationType
+ from holmes.plugins.interfaces import Issue
+ from holmes.plugins.prompts import load_and_render_prompt
+ from holmes.utils.console.result import handle_result
+
+ # Detect and read piped input
+ piped_data = None
+ if not sys.stdin.isatty():
+ piped_data = sys.stdin.read().strip()
+ if interactive:
+ console.print(
+ "[bold yellow]Interactive mode disabled when reading piped input[/bold yellow]"
+ )
+ interactive = False
+
+ expanded_config_file = Path(os.path.expanduser(config_file))
+
+ config = Config.load_from_file(
+ expanded_config_file,
+ model=model,
+ api_key=api_key,
+ max_steps=max_steps,
+ )
+
+ ai = config.create_console_toolcalling_llm(
+ dal=None,
+ refresh_toolsets=refresh_toolsets,
+ )
+ console.print(
+ "[bold yellow]This tool uses AI to generate responses and may not always be accurate.[bold yellow]"
+ )
+
+ if not prompt and not interactive and not piped_data:
+ raise CLIError(
+ "Either the 'prompt' argument must be provided (unless using --interactive mode)."
+ )
+
+ # Handle piped data
+ if piped_data:
+ if prompt:
+ # User provided both piped data and a prompt
+ prompt = f"Here's some piped output:\n\n{piped_data}\n\n{prompt}"
+ else:
+ # Only piped data, no prompt - ask what to do with it
+ prompt = f"Here's some piped output:\n\n{piped_data}\n\nWhat can you tell me about this output?"
+
+ if echo and not interactive and prompt:
+ console.print("[bold yellow]User:[/bold yellow] " + prompt)
+
+ subscription_id = get_subscription_id(cmd.cli_ctx)
+
+ aks_template_context = {
+ "cluster_name": name,
+ "resource_group": resource_group_name,
+ "subscription_id": subscription_id,
+ }
+
+ aks_context_prompt = load_and_render_prompt(
+ AKS_CONTEXT_PROMPT, aks_template_context
+ )
+
+ # Variables not exposed to the user.
+ # Adds a prompt for post processing.
+ post_processing_prompt = None
+ # File to append to prompt
+
+ include_file = None
+ if interactive:
+ run_interactive_loop(
+ ai,
+ console,
+ prompt,
+ include_file,
+ post_processing_prompt,
+ show_tool_output=show_tool_output,
+ system_prompt_additions=aks_context_prompt,
+ )
+ return
+
+ messages = build_initial_ask_messages(
+ console,
+ prompt,
+ include_file,
+ ai.tool_executor,
+ config.get_runbook_catalog(),
+ system_prompt_additions=aks_context_prompt,
+ )
+
+ response = ai.call(messages)
+
+ messages = response.messages
+
+ issue = Issue(
+ id=str(uuid.uuid4()),
+ name=prompt,
+ source_type="holmes-ask",
+ raw={"prompt": prompt, "full_conversation": messages},
+ source_instance_id=socket.gethostname(),
+ )
+ handle_result(
+ response,
+ console,
+ DestinationType.CLI,
+ config,
+ issue,
+ show_tool_output,
+ False,
+ )
diff --git a/src/aks-agent/azext_aks_agent/agent/prompt.py b/src/aks-agent/azext_aks_agent/agent/prompt.py
new file mode 100644
index 00000000000..7856c8d6817
--- /dev/null
+++ b/src/aks-agent/azext_aks_agent/agent/prompt.py
@@ -0,0 +1,87 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for license information.
+# --------------------------------------------------------------------------------------------
+
+AKS_CONTEXT_PROMPT = """
+# AKS-Specific Context and Workflow
+
+You are now operating in Azure Kubernetes Service (AKS) mode. All investigations must consider both Azure control plane and Kubernetes data plane components.
+
+## AKS Context Requirements
+
+### MANDATORY: Establish AKS Cluster Context
+Before any troubleshooting, you MUST establish and validate the AKS cluster context:
+
+{% if cluster_name and resource_group %}
+**User-provided context:**
+- Cluster: `{{cluster_name}}`
+- Resource Group: `{{resource_group}}`
+- Subscription: `{{subscription_id}}`
+
+⚠️ **MANDATORY Validation** - You MUST perform ALL Context Validation Steps below before proceeding with any investigation. Do not skip validation even when context is provided by the user.
+{% else %}
+**Auto-discovery required** - Detect AKS context using this priority:
+
+1. **Primary method**: Check if `aks/core` toolset is available in your toolsets
+ - If available, use the `aks/core` tools to get cluster context directly
+ - This is the preferred method as it provides the most reliable context discovery
+2. **Fallback method**: If `aks/core` toolset is not available:
+ - Get current Azure subscription ID
+ - Extract AKS cluster name from current kubeconfig context
+ - Find resource group by listing AKS clusters with matching name in the subscription
+
+**Critical**: You MUST first check toolset availability before choosing the discovery method.
+
+**Error handling:** If discovery fails (empty response, errors, or toolset unavailable), you MUST:
+1. **IMMEDIATELY STOP ALL OPERATIONS** - Do not proceed with any investigation
+2. **DO NOT ATTEMPT ANY TROUBLESHOOTING** - No kubectl commands, no Azure commands, nothing
+3. **DO NOT INFER THE RESOURCE NAME** - Do not assume any resource name, resource group, or subscription ID
+4. **ONLY display the context failure message** exactly as follows with no extra blank lines (replace the first three placeholders with actual detected values or None):
+ - list "Cluster name", "Resource group", "Subscription ID" with detected value or None
+ - prompt to the user to either provide the the cluster context in the prompt including Cluster name", "Resource group" and "Subscription ID", or
+ - restart the command specifying the cluster info in flags with examples (e.g., --name --resource-group --subscription )
+
+{% endif %}
+
+### Context Validation Steps - MANDATORY FOR ALL SCENARIOS
+**These steps MUST be performed whether context is user-provided or auto-discovered:**
+
+1. **Verify cluster exists** in specified resource group/subscription:
+ - Confirm the AKS cluster can be found under the resource group and subscription
+ - If cluster is not found, STOP and report the validation failure
+2. **Check kubeconfig context** - ensure the current kubectl context matches the target AKS cluster:
+ - **MANDATORY**: This step MUST be performed even if you're only checking Azure resources
+ - Get current kubectl context: `kubectl config current-context`
+ - **ONLY if context doesn't match the target AKS cluster name**:
+ a. **Attempt to download credentials**: Use `az aks get-credentials` to download cluster credentials
+ b. **If credential download fails or no tool is available**, you MUST instruct the user to manually download credentials:
+ ```
+ Please manually download AKS credentials:
+ az aks get-credentials --resource-group {{resource_group}} --name {{cluster_name}} --subscription {{subscription_id}}
+ ```
+ c. **Attempt to switch the kubernetes context**: Use `kubectl config use-context` command (NEVER use `run_bash_command` tool to switch context)
+ d. **If context switch fails or no tool is available**, you MUST instruct the user to manually switch context:
+ ```
+ Please manually switch to the correct kubectl context:
+ kubectl config use-context {{cluster_name}}
+ ```
+ - **Verify the current context is now set to the cluster name**: Run `kubectl config current-context` and confirm it matches the target AKS cluster name
+ - **If context already matches**: Skip credential download and proceed
+ - **This ensures the kubectl context is actively switched to the target cluster for any future Kubernetes operations in the session**
+
+**CRITICAL**: Before performing ANY Kubernetes operations (kubectl commands, checking pods, services, deployments, etc.), you MUST ALWAYS verify that the current kubectl context matches the target AKS cluster name. If it doesn't match, you MUST download the correct credentials and switch context before proceeding. This validation is required EVERY TIME you need to interact with Kubernetes resources, even if you've already validated Azure resources in the same session.
+
+**Only proceed with investigation after ALL validation steps pass successfully.**
+
+### AKS Investigation Approach
+- **Start with cluster health** (nodes, system pods, control plane)
+- **Check Azure-specific components** (load balancers, NSGs, managed identity)
+- **Check Kubernetes-specific components** (deployments, services, ingress, namespaces, RBAC)
+- **Analyze both Azure and Kubernetes logs**
+- **Use AKS-aware tools** from available toolsets
+- **Consider AKS limitations and best practices**
+
+**Note**: "Cluster" in this context refers to both the Azure-managed AKS cluster AND the Kubernetes resources running within it. Both layers must be validated before proceeding.
+
+"""
diff --git a/src/aks-agent/azext_aks_agent/agent/telemetry.py b/src/aks-agent/azext_aks_agent/agent/telemetry.py
new file mode 100644
index 00000000000..581eca87ca1
--- /dev/null
+++ b/src/aks-agent/azext_aks_agent/agent/telemetry.py
@@ -0,0 +1,77 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for license information.
+# --------------------------------------------------------------------------------------------
+
+import datetime
+import logging
+import os
+import platform
+
+from applicationinsights import TelemetryClient
+from azure.cli.core.telemetry import (_get_azure_subscription_id,
+ _get_hash_mac_address, _get_user_agent)
+
+DEFAULT_INSTRUMENTATION_KEY = "c301e561-daea-42d9-b9d1-65fca4166704"
+APPLICATIONINSIGHTS_INSTRUMENTATION_KEY_ENV = "APPLICATIONINSIGHTS_INSTRUMENTATION_KEY"
+
+
+class CLITelemetryClient:
+ def __init__(self):
+ instrumentation_key = self._get_application_insights_instrumentation_key()
+ self._telemetry_client = TelemetryClient(
+ instrumentation_key=instrumentation_key
+ )
+ self.start_time = datetime.datetime.utcnow()
+ self.end_time = ""
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, exc_type, exc_value, traceback):
+ self.end_time = datetime.datetime.utcnow()
+ self.track_agent_started()
+ self.flush()
+
+ def track(self, event_name, properties=None):
+ if properties is None:
+ properties = {}
+ properties.update(self._generate_payload())
+ self._telemetry_client.track_trace(event_name, properties, logging.INFO)
+
+ def track_agent_started(self):
+ timestamp_properties = {
+ "time.start": str(self.start_time),
+ "time.end": str(self.end_time),
+ }
+ self.track("AgentCLIStartup", properties=timestamp_properties)
+
+ def flush(self):
+ self._telemetry_client.flush()
+
+ def _generate_payload(self):
+ extension_name = "aks-agent"
+ try:
+ from azure.cli.core.extension import get_extension
+
+ ext_name = "aks-agent"
+ ext = get_extension(ext_name)
+ extension_name = f"aks-agent@{ext.version}"
+ except: # pylint: disable=W0702
+ pass
+
+ return {
+ "device.id": _get_hash_mac_address(),
+ "service.name": "aks agent",
+ "userAzureSubscriptionId": _get_azure_subscription_id(),
+ "OS.Type": platform.system().lower(), # eg. darwin, windows
+ "OS.Version": platform.version().lower(), # eg. 10.0.14942
+ "OS.Platform": platform.platform().lower(), # eg. windows-10-10.0.19041-sp0
+ "userAgent": _get_user_agent(),
+ "extensionname": extension_name, # extension and version
+ }
+
+ def _get_application_insights_instrumentation_key(self) -> str:
+ return os.getenv(
+ APPLICATIONINSIGHTS_INSTRUMENTATION_KEY_ENV, DEFAULT_INSTRUMENTATION_KEY
+ )
diff --git a/src/aks-agent/azext_aks_agent/azext_metadata.json b/src/aks-agent/azext_aks_agent/azext_metadata.json
new file mode 100644
index 00000000000..1599d697a47
--- /dev/null
+++ b/src/aks-agent/azext_aks_agent/azext_metadata.json
@@ -0,0 +1,5 @@
+{
+ "azext.minCliCoreVersion": "2.76.0",
+ "azext.isPreview": true,
+ "name": "aks-agent"
+}
\ No newline at end of file
diff --git a/src/aks-agent/azext_aks_agent/commands.py b/src/aks-agent/azext_aks_agent/commands.py
new file mode 100644
index 00000000000..726dbb56590
--- /dev/null
+++ b/src/aks-agent/azext_aks_agent/commands.py
@@ -0,0 +1,16 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for license information.
+# --------------------------------------------------------------------------------------------
+
+from knack.log import get_logger
+
+logger = get_logger(__name__)
+
+
+# pylint: disable=too-many-statements
+def load_command_table(self, _):
+ with self.command_group(
+ "aks",
+ ) as g:
+ g.custom_command("agent", "aks_agent")
diff --git a/src/aks-agent/azext_aks_agent/custom.py b/src/aks-agent/azext_aks_agent/custom.py
new file mode 100644
index 00000000000..20cc1f68625
--- /dev/null
+++ b/src/aks-agent/azext_aks_agent/custom.py
@@ -0,0 +1,44 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for license information.
+# --------------------------------------------------------------------------------------------
+
+# pylint: disable=too-many-lines, disable=broad-except
+from azext_aks_agent.agent.agent import aks_agent as aks_agent_internal
+
+from knack.log import get_logger
+
+
+logger = get_logger(__name__)
+
+
+# pylint: disable=unused-argument
+def aks_agent(
+ cmd,
+ prompt,
+ model,
+ max_steps,
+ config_file,
+ resource_group_name=None,
+ name=None,
+ api_key=None,
+ no_interactive=False,
+ no_echo_request=False,
+ show_tool_output=False,
+ refresh_toolsets=False,
+):
+
+ aks_agent_internal(
+ cmd,
+ resource_group_name,
+ name,
+ prompt,
+ model,
+ api_key,
+ max_steps,
+ config_file,
+ no_interactive,
+ no_echo_request,
+ show_tool_output,
+ refresh_toolsets,
+ )
diff --git a/src/aks-agent/azext_aks_agent/tests/__init__.py b/src/aks-agent/azext_aks_agent/tests/__init__.py
new file mode 100644
index 00000000000..34913fb394d
--- /dev/null
+++ b/src/aks-agent/azext_aks_agent/tests/__init__.py
@@ -0,0 +1,4 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for license information.
+# --------------------------------------------------------------------------------------------
diff --git a/src/aks-agent/azext_aks_agent/tests/latest/__init__.py b/src/aks-agent/azext_aks_agent/tests/latest/__init__.py
new file mode 100644
index 00000000000..34913fb394d
--- /dev/null
+++ b/src/aks-agent/azext_aks_agent/tests/latest/__init__.py
@@ -0,0 +1,4 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for license information.
+# --------------------------------------------------------------------------------------------
diff --git a/src/aks-agent/azext_aks_agent/tests/latest/test_agent.py b/src/aks-agent/azext_aks_agent/tests/latest/test_agent.py
new file mode 100644
index 00000000000..eb0082b840e
--- /dev/null
+++ b/src/aks-agent/azext_aks_agent/tests/latest/test_agent.py
@@ -0,0 +1,204 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for license information.
+# --------------------------------------------------------------------------------------------
+import logging
+import os
+import sys
+import unittest
+from types import SimpleNamespace
+from unittest.mock import MagicMock, Mock, call, patch
+
+from azext_aks_agent._consts import (CONST_AGENT_CONFIG_PATH_DIR_ENV_KEY,
+ CONST_AGENT_NAME,
+ CONST_AGENT_NAME_ENV_KEY)
+from azext_aks_agent.agent.agent import aks_agent, init_log
+from azure.cli.core.util import CLIError
+
+# Mock the holmes modules before any imports that might trigger holmes imports
+sys.modules['holmes'] = Mock()
+sys.modules['holmes.config'] = Mock()
+sys.modules['holmes.core'] = Mock()
+sys.modules['holmes.core.prompt'] = Mock()
+sys.modules['holmes.interactive'] = Mock()
+sys.modules['holmes.plugins'] = Mock()
+sys.modules['holmes.plugins.destinations'] = Mock()
+sys.modules['holmes.plugins.interfaces'] = Mock()
+sys.modules['holmes.plugins.prompts'] = Mock()
+sys.modules['holmes.utils'] = Mock()
+sys.modules['holmes.utils.console'] = Mock()
+sys.modules['holmes.utils.console.logging'] = Mock()
+sys.modules['holmes.utils.console.result'] = Mock()
+
+
+def setUpModule():
+ # Skip all tests in this module for Python versions below 3.10
+ if sys.version_info < (3, 10):
+ raise unittest.SkipTest("Tests in this module require Python >= 3.10")
+
+
+class TestInitLog(unittest.TestCase):
+ """Test cases for init_log function"""
+
+ @patch('azext_aks_agent.agent.agent.logging.getLogger')
+ def test_init_log_logger_level_setting(self, mock_get_logger):
+ """Test that specific loggers get WARNING level set"""
+ # Arrange
+ mock_logger = Mock()
+ mock_get_logger.return_value = mock_logger
+
+ with patch('holmes.utils.console.logging.init_logging') as mock_init_logging:
+ mock_init_logging.return_value = Mock()
+
+ # Act
+ init_log()
+
+ # Assert that setLevel was called 6 times with WARNING
+ self.assertEqual(mock_logger.setLevel.call_count, 6)
+ for call_args in mock_logger.setLevel.call_args_list:
+ self.assertEqual(call_args[0][0], logging.WARNING)
+
+
+class TestAksAgent(unittest.TestCase):
+ """Test cases for aks_agent function"""
+
+ def setUp(self):
+ """Set up test fixtures"""
+ self.mock_cmd = Mock()
+ self.mock_cmd.cli_ctx = Mock()
+ # Fix the cli_ctx.data structure to be subscriptable
+ self.mock_cmd.cli_ctx.data = {'subscription_id': 'test-subscription-id'}
+
+ # Default parameters for aks_agent function
+ self.default_params = {
+ 'cmd': self.mock_cmd,
+ 'resource_group_name': 'test-rg',
+ 'name': 'test-cluster',
+ 'prompt': 'test prompt',
+ 'model': 'test-model',
+ 'api_key': 'test-key',
+ 'max_steps': 10,
+ 'config_file': '/path/to/config.yaml',
+ 'no_interactive': False,
+ 'no_echo_request': False,
+ 'show_tool_output': True,
+ 'refresh_toolsets': False,
+ }
+
+ def test_aks_agent_python_version_check(self):
+ """Test that aks_agent raises error for Python version < 3.10"""
+ with patch.object(sys, 'version_info', (3, 9, 0)):
+ with patch('azext_aks_agent.agent.agent.CLITelemetryClient'):
+ with self.assertRaises(CLIError) as cm:
+ aks_agent(**self.default_params)
+
+ self.assertIn("Please upgrade the python version to 3.10", str(cm.exception))
+
+ @patch('sys.stdin.isatty')
+ @patch('azext_aks_agent.agent.agent.CLITelemetryClient')
+ @patch('azext_aks_agent.agent.agent.init_log')
+ @patch('azure.cli.core.api.get_config_dir')
+ @patch('azure.cli.core.commands.client_factory.get_subscription_id')
+ @patch('os.path.expanduser')
+ def test_aks_agent_no_prompt_no_interactive_raises_error(self, mock_expanduser, mock_get_subscription_id,
+ mock_get_config_dir, mock_init_log,
+ mock_cli_telemetry, mock_stdin_isatty):
+ """Test that aks_agent raises error when no prompt and not interactive mode"""
+ # Arrange
+ mock_stdin_isatty.return_value = True # No piped input
+ mock_console = Mock()
+ mock_init_log.return_value = mock_console
+ mock_get_config_dir.return_value = "/home/user/.azure"
+ mock_get_subscription_id.return_value = "test-subscription"
+
+ # Mock os.path.expanduser to return a simple path string
+ mock_expanduser.return_value = "/expanded/path/to/config.yaml"
+
+ with patch.dict(os.environ, {}, clear=True):
+ with patch('holmes.config.Config') as mock_config_class:
+ mock_config = Mock()
+ mock_config_class.load_from_file.return_value = mock_config
+ mock_ai = Mock()
+ mock_config.create_console_toolcalling_llm.return_value = mock_ai
+
+ # Act & Assert
+ params = self.default_params.copy()
+ params['prompt'] = None
+ params['no_interactive'] = True # Not interactive
+
+ with self.assertRaises(CLIError) as cm:
+ aks_agent(**params)
+
+ self.assertIn("Either the 'prompt' argument must be provided", str(cm.exception))
+
+ @patch('sys.stdin.isatty')
+ @patch('azext_aks_agent.agent.agent.CLITelemetryClient')
+ @patch('azext_aks_agent.agent.agent.init_log')
+ @patch('azure.cli.core.api.get_config_dir')
+ @patch('azure.cli.core.commands.client_factory.get_subscription_id')
+ @patch('os.path.expanduser')
+ def test_aks_agent_echo_request_enabled(self, mock_expanduser, mock_get_subscription_id, mock_get_config_dir,
+ mock_init_log, mock_cli_telemetry, mock_stdin_isatty):
+ """Test aks_agent echoes request when echo is enabled"""
+ # Arrange
+ mock_stdin_isatty.return_value = True
+ mock_console = Mock()
+ mock_init_log.return_value = mock_console
+ mock_get_config_dir.return_value = "/home/user/.azure"
+ mock_get_subscription_id.return_value = "test-subscription"
+
+ # Mock os.path.expanduser to return a simple path string
+ mock_expanduser.return_value = "/expanded/path/to/config.yaml"
+
+ with patch.dict(os.environ, {}, clear=True):
+ with patch('holmes.config.Config') as mock_config_class:
+ mock_config = Mock()
+ mock_config_class.load_from_file.return_value = mock_config
+ mock_ai = Mock()
+ mock_config.create_console_toolcalling_llm.return_value = mock_ai
+ mock_config.get_runbook_catalog.return_value = {}
+
+ with patch('holmes.core.prompt.build_initial_ask_messages') as mock_build_messages:
+ mock_messages = [{'role': 'user', 'content': 'test'}]
+ mock_build_messages.return_value = mock_messages
+
+ mock_response = Mock()
+ mock_response.messages = mock_messages
+ mock_ai.call.return_value = mock_response
+
+ with patch('holmes.utils.console.result.handle_result'):
+ with patch('holmes.plugins.prompts.load_and_render_prompt') as mock_load_prompt:
+ with patch('holmes.plugins.interfaces.Issue'):
+ with patch('uuid.uuid4'):
+ with patch('socket.gethostname'):
+ mock_load_prompt.return_value = "AKS context"
+
+ # Act
+ params = self.default_params.copy()
+ params['no_interactive'] = True # Non-interactive
+ params['no_echo_request'] = False # Echo enabled
+ aks_agent(**params)
+
+ # Assert that console.print was called with the user prompt
+ mock_console.print.assert_any_call("[bold yellow]User:[/bold yellow] test prompt")
+
+ @patch('azext_aks_agent.agent.agent.CLITelemetryClient')
+ def test_aks_agent_telemetry_client_usage(self, mock_cli_telemetry):
+ """Test that aks_agent uses CLITelemetryClient context manager"""
+ # Arrange
+ mock_cli_telemetry.return_value.__enter__ = Mock(return_value=Mock())
+ mock_cli_telemetry.return_value.__exit__ = Mock(return_value=None)
+
+ with patch.object(sys, 'version_info', (3, 9, 0)):
+ # Act & Assert
+ with self.assertRaises(CLIError):
+ aks_agent(**self.default_params)
+
+ # Verify CLITelemetryClient was used as context manager
+ mock_cli_telemetry.assert_called_once()
+ mock_cli_telemetry.return_value.__enter__.assert_called_once()
+ mock_cli_telemetry.return_value.__exit__.assert_called_once()
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/src/aks-agent/azext_aks_agent/tests/latest/test_validators.py b/src/aks-agent/azext_aks_agent/tests/latest/test_validators.py
new file mode 100644
index 00000000000..7e366d45f63
--- /dev/null
+++ b/src/aks-agent/azext_aks_agent/tests/latest/test_validators.py
@@ -0,0 +1,297 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for license information.
+# --------------------------------------------------------------------------------------------
+import os
+import shutil
+import tempfile
+import unittest
+from unittest.mock import patch
+
+import azext_aks_agent._validators as validators
+from azure.cli.core.azclierror import InvalidArgumentValueError
+
+
+class TestValidateParamYamlFile(unittest.TestCase):
+ def setUp(self):
+ self.temp_dir = tempfile.mkdtemp()
+ self.valid_yaml_file = os.path.join(self.temp_dir, "valid.yaml")
+ self.invalid_yaml_file = os.path.join(self.temp_dir, "invalid.yaml")
+ self.readonly_yaml_file = os.path.join(self.temp_dir, "readonly.yaml")
+ self.nonexistent_file = os.path.join(self.temp_dir, "nonexistent.yaml")
+
+ # Create valid YAML file
+ with open(self.valid_yaml_file, 'w') as f:
+ f.write("key1: value1\nkey2:\n - item1\n - item2\n")
+
+ # Create invalid YAML file
+ with open(self.invalid_yaml_file, 'w') as f:
+ f.write("invalid: yaml: content: [\n - unclosed\n")
+
+ # Create readonly YAML file
+ with open(self.readonly_yaml_file, 'w') as f:
+ f.write("key: value\n")
+ os.chmod(self.readonly_yaml_file, 0o000) # Remove all permissions
+
+ def tearDown(self):
+ # Restore permissions before cleanup
+ if os.path.exists(self.readonly_yaml_file):
+ os.chmod(self.readonly_yaml_file, 0o644)
+ shutil.rmtree(self.temp_dir, ignore_errors=True)
+
+ def test_none_yaml_path(self):
+ """Test that None yaml_path returns without error"""
+ validators._validate_param_yaml_file(None, "config-file")
+
+ def test_empty_yaml_path(self):
+ """Test that empty string yaml_path returns without error"""
+ validators._validate_param_yaml_file("", "config-file")
+
+ def test_nonexistent_file(self):
+ """Test that non-existent file raises InvalidArgumentValueError"""
+ with self.assertRaises(InvalidArgumentValueError) as cm:
+ validators._validate_param_yaml_file(self.nonexistent_file, "config-file")
+ self.assertIn("file is not found", str(cm.exception))
+ self.assertIn("config-file", str(cm.exception))
+
+ def test_unreadable_file(self):
+ """Test that unreadable file raises InvalidArgumentValueError"""
+ import os
+
+ # Skip on Windows as it handles permissions differently
+ if os.name == 'nt':
+ self.skipTest("Skipping readonly test on Windows")
+
+ with self.assertRaises(InvalidArgumentValueError) as cm:
+ validators._validate_param_yaml_file(self.readonly_yaml_file, "config-file")
+ self.assertIn("file is not readable", str(cm.exception))
+ self.assertIn("config-file", str(cm.exception))
+
+ def test_invalid_yaml_file(self):
+ """Test that invalid YAML content raises InvalidArgumentValueError"""
+ with self.assertRaises(InvalidArgumentValueError) as cm:
+ validators._validate_param_yaml_file(self.invalid_yaml_file, "config-file")
+ self.assertIn("file is not a valid YAML file", str(cm.exception))
+ self.assertIn("config-file", str(cm.exception))
+
+ def test_valid_yaml_file(self):
+ """Test that valid YAML file passes validation"""
+ # Should not raise any exception
+ validators._validate_param_yaml_file(self.valid_yaml_file, "config-file")
+
+ def test_different_param_names(self):
+ """Test that different parameter names are included in error messages"""
+ with self.assertRaises(InvalidArgumentValueError) as cm:
+ validators._validate_param_yaml_file(self.nonexistent_file, "my-custom-param")
+ self.assertIn("my-custom-param", str(cm.exception))
+
+ @patch('builtins.open')
+ def test_general_exception_handling(self, mock_open):
+ """Test that general exceptions are caught and re-raised as InvalidArgumentValueError"""
+ mock_open.side_effect = PermissionError("Access denied")
+
+ with self.assertRaises(InvalidArgumentValueError) as cm:
+ validators._validate_param_yaml_file(self.valid_yaml_file, "config-file")
+ self.assertIn("An error occurred while reading the config file", str(cm.exception))
+ self.assertIn("config-file", str(cm.exception))
+
+ def test_complex_yaml_file(self):
+ """Test validation with complex YAML structure"""
+ import os
+ complex_yaml_file = os.path.join(self.temp_dir, "complex.yaml")
+ with open(complex_yaml_file, 'w') as f:
+ f.write("""
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: test-config
+ namespace: default
+data:
+ config.yaml: |
+ server:
+ host: localhost
+ port: 8080
+ features:
+ - auth
+ - logging
+ database:
+ url: "postgresql://user:pass@host:5432/db"
+ pool_size: 10
+""")
+
+ # Should not raise any exception
+ validators._validate_param_yaml_file(complex_yaml_file, "config-file")
+
+ def test_empty_yaml_file(self):
+ """Test validation with empty YAML file"""
+ import os
+ empty_yaml_file = os.path.join(self.temp_dir, "empty.yaml")
+ with open(empty_yaml_file, 'w') as f:
+ f.write("")
+
+ # Should not raise any exception - empty file is valid YAML
+ validators._validate_param_yaml_file(empty_yaml_file, "config-file")
+
+
+class AgentConfigFileNamespace:
+ def __init__(self, config_file=None):
+ self.config_file = config_file
+
+
+class TestValidateAgentConfigFile(unittest.TestCase):
+ def setUp(self):
+
+ self.temp_dir = tempfile.mkdtemp()
+ self.valid_yaml_file = os.path.join(self.temp_dir, "valid_agent.yaml")
+ self.invalid_yaml_file = os.path.join(self.temp_dir, "invalid_agent.yaml")
+ self.readonly_yaml_file = os.path.join(self.temp_dir, "readonly_agent.yaml")
+ self.nonexistent_file = os.path.join(self.temp_dir, "nonexistent_agent.yaml")
+
+ # Create valid YAML file
+ with open(self.valid_yaml_file, 'w') as f:
+ f.write("""
+model=azure/gpt-4.1
+""")
+
+ # Create invalid YAML file
+ with open(self.invalid_yaml_file, 'w') as f:
+ f.write("invalid: yaml: content: [\n - unclosed\n")
+
+ # Create readonly YAML file
+ with open(self.readonly_yaml_file, 'w') as f:
+ f.write("agent:\n config: test\n")
+ os.chmod(self.readonly_yaml_file, 0o000) # Remove all permissions
+
+ def tearDown(self):
+ import os
+ import shutil
+
+ # Restore permissions before cleanup
+ if os.path.exists(self.readonly_yaml_file):
+ os.chmod(self.readonly_yaml_file, 0o644)
+ shutil.rmtree(self.temp_dir, ignore_errors=True)
+
+ def test_none_config_file(self):
+ """Test that None config_file returns without error"""
+ namespace = AgentConfigFileNamespace(None)
+ validators.validate_agent_config_file(namespace)
+
+ def test_empty_config_file(self):
+ """Test that empty string config_file returns without error"""
+ namespace = AgentConfigFileNamespace("")
+ validators.validate_agent_config_file(namespace)
+
+ def test_valid_config_file(self):
+ """Test that valid YAML config file passes validation"""
+ namespace = AgentConfigFileNamespace(self.valid_yaml_file)
+ # Should not raise any exception
+ validators.validate_agent_config_file(namespace)
+
+ def test_invalid_yaml_config_file(self):
+ """Test that invalid YAML config file raises InvalidArgumentValueError"""
+ namespace = AgentConfigFileNamespace(self.invalid_yaml_file)
+ with self.assertRaises(InvalidArgumentValueError) as cm:
+ validators.validate_agent_config_file(namespace)
+ self.assertIn("file is not a valid YAML file", str(cm.exception))
+ self.assertIn("config-file", str(cm.exception))
+
+ def test_nonexistent_config_file(self):
+ """Test that non-existent config file raises InvalidArgumentValueError"""
+ namespace = AgentConfigFileNamespace(self.nonexistent_file)
+ with self.assertRaises(InvalidArgumentValueError) as cm:
+ validators.validate_agent_config_file(namespace)
+ self.assertIn("file is not found", str(cm.exception))
+ self.assertIn("config-file", str(cm.exception))
+
+ def test_unreadable_config_file(self):
+ """Test that unreadable config file raises InvalidArgumentValueError"""
+ import os
+
+ # Skip on Windows as it handles permissions differently
+ if os.name == 'nt':
+ self.skipTest("Skipping readonly test on Windows")
+
+ namespace = AgentConfigFileNamespace(self.readonly_yaml_file)
+ with self.assertRaises(InvalidArgumentValueError) as cm:
+ validators.validate_agent_config_file(namespace)
+ self.assertIn("file is not readable", str(cm.exception))
+ self.assertIn("config-file", str(cm.exception))
+
+ @patch('azext_aks_agent._validators.get_config_dir')
+ @patch('azext_aks_agent._validators.os.path.exists')
+ def test_default_config_path_nonexistent(self, mock_exists, mock_get_config_dir):
+ """Test that default config path that doesn't exist returns without error"""
+ mock_get_config_dir.return_value = "/home/user/.azure"
+ mock_exists.return_value = False
+
+ default_path = "/home/user/.azure/aksAgent.yaml"
+ namespace = AgentConfigFileNamespace(default_path)
+
+ # Should not raise any exception when default path doesn't exist
+ validators.validate_agent_config_file(namespace)
+
+ @patch('azext_aks_agent._validators.get_config_dir')
+ def test_default_config_path_exists_valid(self, mock_get_config_dir):
+ """Test that default config path with valid file passes validation"""
+ mock_get_config_dir.return_value = self.temp_dir
+
+ default_path = os.path.join(self.temp_dir, "aksAgent.yaml")
+ # Create the default config file
+ with open(default_path, 'w') as f:
+ f.write("agent:\n config: default\n")
+
+ namespace = AgentConfigFileNamespace(default_path)
+ # Should not raise any exception
+ validators.validate_agent_config_file(namespace)
+
+ @patch('azext_aks_agent._validators.get_config_dir')
+ def test_default_config_path_exists_invalid(self, mock_get_config_dir):
+ """Test that default config path with invalid file raises error"""
+ mock_get_config_dir.return_value = self.temp_dir
+
+ default_path = os.path.join(self.temp_dir, "aksAgent.yaml")
+ # Create the default config file with invalid YAML
+ with open(default_path, 'w') as f:
+ f.write("invalid: yaml: [\n unclosed\n")
+
+ namespace = AgentConfigFileNamespace(default_path)
+ with self.assertRaises(InvalidArgumentValueError) as cm:
+ validators.validate_agent_config_file(namespace)
+ self.assertIn("file is not a valid YAML file", str(cm.exception))
+
+ def test_empty_agent_config_file(self):
+ """Test validation with empty agent config file"""
+ import os
+ empty_config_file = os.path.join(self.temp_dir, "empty_agent.yaml")
+ with open(empty_config_file, 'w') as f:
+ f.write("")
+
+ namespace = AgentConfigFileNamespace(empty_config_file)
+ # Should not raise any exception - empty file is valid YAML
+ validators.validate_agent_config_file(namespace)
+
+ @patch('builtins.open')
+ def test_file_access_exception(self, mock_open):
+ """Test that general file access exceptions are handled properly"""
+ mock_open.side_effect = PermissionError("Access denied")
+
+ namespace = AgentConfigFileNamespace(self.valid_yaml_file)
+ with self.assertRaises(InvalidArgumentValueError) as cm:
+ validators.validate_agent_config_file(namespace)
+ self.assertIn("An error occurred while reading the config file", str(cm.exception))
+ self.assertIn("config-file", str(cm.exception))
+
+ def test_minimal_valid_agent_config(self):
+ """Test validation with minimal valid agent configuration"""
+ import os
+ minimal_config_file = os.path.join(self.temp_dir, "minimal_agent.yaml")
+ with open(minimal_config_file, 'w') as f:
+ f.write("agent: {}")
+
+ namespace = AgentConfigFileNamespace(minimal_config_file)
+ # Should not raise any exception
+ validators.validate_agent_config_file(namespace)
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/src/aks-agent/azext_aks_agent/tests/latest/utils.py b/src/aks-agent/azext_aks_agent/tests/latest/utils.py
new file mode 100644
index 00000000000..69cda720ee8
--- /dev/null
+++ b/src/aks-agent/azext_aks_agent/tests/latest/utils.py
@@ -0,0 +1,11 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for license information.
+# --------------------------------------------------------------------------------------------
+
+import os
+
+
+def get_test_data_file_path(filename):
+ curr_dir = os.path.dirname(os.path.realpath(__file__))
+ return os.path.join(curr_dir, "data", filename)
diff --git a/src/aks-agent/setup.cfg b/src/aks-agent/setup.cfg
new file mode 100644
index 00000000000..3c6e79cf31d
--- /dev/null
+++ b/src/aks-agent/setup.cfg
@@ -0,0 +1,2 @@
+[bdist_wheel]
+universal=1
diff --git a/src/aks-agent/setup.py b/src/aks-agent/setup.py
new file mode 100644
index 00000000000..50725225ecb
--- /dev/null
+++ b/src/aks-agent/setup.py
@@ -0,0 +1,48 @@
+#!/usr/bin/env python
+
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for license information.
+# --------------------------------------------------------------------------------------------
+
+from codecs import open as open1
+
+from setuptools import find_packages, setup
+
+VERSION = "1.0.0b1"
+
+CLASSIFIERS = [
+ "Development Status :: 4 - Beta",
+ "Intended Audience :: Developers",
+ "Intended Audience :: System Administrators",
+ "Programming Language :: Python",
+ "Programming Language :: Python :: 3",
+ "Programming Language :: Python :: 3.6",
+ "Programming Language :: Python :: 3.7",
+ "Programming Language :: Python :: 3.8",
+ "License :: OSI Approved :: MIT License",
+]
+
+DEPENDENCIES = [
+ "holmesgpt==0.12.6; python_version >= '3.10'",
+]
+
+with open1("README.rst", "r", encoding="utf-8") as f:
+ README = f.read()
+with open1("HISTORY.rst", "r", encoding="utf-8") as f:
+ HISTORY = f.read()
+
+setup(
+ name="aks-agent",
+ version=VERSION,
+ description="Provides an interactive AI-powered debugging tool for AKS",
+ long_description=README + "\n\n" + HISTORY,
+ license="MIT",
+ author="Microsoft Corporation",
+ author_email="azpycli@microsoft.com",
+ url="https://github.com/Azure/azure-cli-extensions/tree/main/src/aks-agent",
+ classifiers=CLASSIFIERS,
+ packages=find_packages(exclude=["tests"]),
+ package_data={"azext_aks_agent": ["azext_metadata.json"]},
+ install_requires=DEPENDENCIES,
+)