Azure · yanzhudd · Aug 26, 2025 · Aug 15, 2025 · Aug 15, 2025 · Aug 19, 2025
@@ -12,6 +12,10 @@ To release a new version, please select a new version number (usually plus 1 to
 Pending
 +++++++
 
+18.0.0b28
++++++++
+* Add interactive AI-powered debugging tool `az aks agent`.
+
 18.0.0b27
 +++++++
 * Add framework for interactive AI-powered debugging tool.

@@ -378,4 +378,4 @@
 CONST_AGENT_CONFIG_PATH_DIR_ENV_KEY = "HOLMES_CONFIGPATH_DIR"
 CONST_AGENT_NAME = "AKS AGENT"
 CONST_AGENT_NAME_ENV_KEY = "AGENT_NAME"
-CONST_AGENT_CONFIG_FILE_NAME = "aksAgent.config"
+CONST_AGENT_CONFIG_FILE_NAME = "aksAgent.yaml"
@@ -3953,100 +3953,98 @@
           short-summary: Name of the identity binding to show.
 """
 
-# pylint: disable=line-too-long
-# helps[
-#     "aks agent"
-# ] = """
-#     type: command
-#     short-summary: Run AI assistant to analyze and troubleshoot Kubernetes clusters.
-#     long-summary: |-
-#       This command allows you to ask questions about your Azure Kubernetes cluster and get answers using AI models.
-#       Environment variables must be set to use the AI model, please refer to https://docs.litellm.ai/docs/providers to learn more about supported AI providers and models and required environment variables.
-#     parameters:
-#         - name: --name -n
-#           type: string
-#           short-summary: Name of the managed cluster.
-#         - name: --resource-group -g
-#           type: string
-#           short-summary: Name of the resource group.
-#         - name: --model
-#           type: string
-#           short-summary: Model to use for the LLM.
-#         - name: --api-key
-#           type: string
-#           short-summary: API key to use for the LLM (if not given, uses environment variables AZURE_API_KEY, OPENAI_API_KEY).
-#         - name: --config-file
-#           type: string
-#           short-summary: Path to configuration file.
-#         - name: --max-steps
-#           type: int
-#           short-summary: Maximum number of steps the LLM can take to investigate the issue.
-#         - name: --no-interactive
-#           type: bool
-#           short-summary: Disable interactive mode. When set, the agent will not prompt for input and will run in batch mode.
-#         - name: --no-echo-request
-#           type: bool
-#           short-summary: Disable echoing back the question provided to AKS Agent in the output.
-#         - name: --show-tool-output
-#           type: bool
-#           short-summary: Show the output of each tool that was called during the analysis.
-#         - name: --refresh-toolsets
-#           type: bool
-#           short-summary: Refresh the toolsets status.
-#
-#     examples:
-#         - name: Ask about pod issues in the cluster with Azure OpenAI
-#           text: |-
-#             export AZURE_API_BASE="https://my-azureopenai-service.openai.azure.com/"
-#             export AZURE_API_VERSION="2025-01-01-preview"
-#             export AZURE_API_KEY="sk-xxx"
-#             az aks agent "Why are my pods not starting?" --name MyManagedCluster --resource-group MyResourceGroup --model azure/my-gpt4.1-deployment
-#         - name: Ask about pod issues in the cluster with OpenAI
-#           text: |-
-#             export OPENAI_API_KEY="sk-xxx"
-#             az aks agent "Why are my pods not starting?" --name MyManagedCluster --resource-group MyResourceGroup --model gpt-4o
-#           text: az aks agent "Why are my pods not starting?"
-#         - name: Run in interactive mode without a question
-#           text: az aks agent "Check the pod status in my cluster" --name MyManagedCluster --resource-group MyResourceGroup --model azure/my-gpt4.1-deployment --api-key "sk-xxx"
-#         - name: Run in non-interactive batch mode
-#           text: az aks agent "Diagnose networking issues" --no-interactive --max-steps 15 --model azure/my-gpt4.1-deployment
-#         - name: Show detailed tool output during analysis
-#           text: az aks agent "Why is my service workload unavailable in namespace workload-ns?" --show-tool-output --model azure/my-gpt4.1-deployment
-#         - name: Use custom configuration file
-#           text: az aks agent "Check kubernetes pod resource usage" --config-file /path/to/custom.config --model azure/my-gpt4.1-deployment
-#         - name: Run agent with no echo of the original question
-#           text: az aks agent "What is the status of my cluster?" --no-echo-request --model azure/my-gpt4.1-deployment
-#         - name: Refresh toolsets to get the latest available tools
-#           text: az aks agent "What is the status of my cluster?" --refresh-toolsets --model azure/my-gpt4.1-deploymen
-#         - name: Run agent with config file
-#           text: |
-#             az aks agent "Check kubernetes pod resource usage" --config-file /path/to/custom.config
-#             Here is an example of config file:
-#             ```json
-#             model: "gpt-4o"
-#             api_key: "..."
-#             # define a list of mcp servers, mcp server can be defined
-#             mcp_servers:
-#               aks_mcp:
-#                 description: "The AKS-MCP is a Model Context Protocol (MCP) server that enables AI assistants to interact with Azure Kubernetes Service (AKS) clusters"
-#                 url: "http://localhost:8003/sse"
-#
-#             # try adding your own tools or toggle the built-in toolsets here
-#             # e.g. query company-specific data, fetch logs from your existing observability tools, etc
-#             # To check how to add a customized toolset, please refer to https://docs.robusta.dev/master/configuration/holmesgpt/custom_toolsets.html#custom-toolsets
-#             # To find all built-in toolsets, please refer to https://docs.robusta.dev/master/configuration/holmesgpt/builtin_toolsets.html
-#             toolsets:
-#               # add a new json processor toolset
-#               json_processor:
-#                 description: "A toolset for processing JSON data using jq"
-#                 prerequisites:
-#                   - command: "jq --version"  # Ensure jq is installed
-#                 tools:
-#                   - name: "process_json"
-#                     description: "A tool that uses jq to process JSON input"
-#                     command: "echo '{{ json_input }}' | jq '.'"  # Example jq command to format JSON
-#               # disable a built-in toolsets
-#               aks/core:
-#                 enabled: false
-#               ```
-# """
+helps[
+    "aks agent"
+] = """
+    type: command
+    short-summary: Run AI assistant to analyze and troubleshoot Kubernetes clusters.
+    long-summary: |-
+      This command allows you to ask questions about your Azure Kubernetes cluster and get answers using AI models.
+      Environment variables must be set to use the AI model, please refer to https://docs.litellm.ai/docs/providers to learn more about supported AI providers and models and required environment variables.
+    parameters:
+        - name: --name -n
+          type: string
+          short-summary: Name of the managed cluster.
+        - name: --resource-group -g
+          type: string
+          short-summary: Name of the resource group.
+        - name: --model
+          type: string
+          short-summary: Model to use for the LLM.
+        - name: --api-key
+          type: string
+          short-summary: API key to use for the LLM (if not given, uses environment variables AZURE_API_KEY, OPENAI_API_KEY).
+        - name: --config-file
+          type: string
+          short-summary: Path to configuration file.
+        - name: --max-steps
+          type: int
+          short-summary: Maximum number of steps the LLM can take to investigate the issue.
+        - name: --no-interactive
+          type: bool
+          short-summary: Disable interactive mode. When set, the agent will not prompt for input and will run in batch mode.
+        - name: --no-echo-request
+          type: bool
+          short-summary: Disable echoing back the question provided to AKS Agent in the output.
+        - name: --show-tool-output
+          type: bool
+          short-summary: Show the output of each tool that was called during the analysis.
+        - name: --refresh-toolsets
+          type: bool
+          short-summary: Refresh the toolsets status.
+
+    examples:
+        - name: Ask about pod issues in the cluster with Azure OpenAI
+          text: |-
+            export AZURE_API_BASE="https://my-azureopenai-service.openai.azure.com/"
+            export AZURE_API_VERSION="2025-01-01-preview"
+            export AZURE_API_KEY="sk-xxx"
+            az aks agent "Why are my pods not starting?" --name MyManagedCluster --resource-group MyResourceGroup --model azure/my-gpt4.1-deployment
+        - name: Ask about pod issues in the cluster with OpenAI
+          text: |-
+            export OPENAI_API_KEY="sk-xxx"
+            az aks agent "Why are my pods not starting?" --name MyManagedCluster --resource-group MyResourceGroup --model gpt-4o
+        - name: Run in interactive mode without a question
+          text: az aks agent "Check the pod status in my cluster" --name MyManagedCluster --resource-group MyResourceGroup --model azure/my-gpt4.1-deployment --api-key "sk-xxx"
+        - name: Run in non-interactive batch mode
+          text: az aks agent "Diagnose networking issues" --no-interactive --max-steps 15 --model azure/my-gpt4.1-deployment
+        - name: Show detailed tool output during analysis
+          text: az aks agent "Why is my service workload unavailable in namespace workload-ns?" --show-tool-output --model azure/my-gpt4.1-deployment
+        - name: Use custom configuration file
+          text: az aks agent "Check kubernetes pod resource usage" --config-file /path/to/custom.yaml --model azure/my-gpt4.1-deployment
+        - name: Run agent with no echo of the original question
+          text: az aks agent "What is the status of my cluster?" --no-echo-request --model azure/my-gpt4.1-deployment
+        - name: Refresh toolsets to get the latest available tools
+          text: az aks agent "What is the status of my cluster?" --refresh-toolsets --model azure/my-gpt4.1-deployment
+        - name: Run agent with config file
+          text: |
+            az aks agent "Check kubernetes pod resource usage" --config-file /path/to/custom.yaml
+            Here is an example of config file:
+            ```json
+            model: "gpt-4o"
+            api_key: "..."
+            # define a list of mcp servers, mcp server can be defined
+            mcp_servers:
+              aks_mcp:
+                description: "The AKS-MCP is a Model Context Protocol (MCP) server that enables AI assistants to interact with Azure Kubernetes Service (AKS) clusters"
+                url: "http://localhost:8003/sse"
+
+            # try adding your own tools or toggle the built-in toolsets here
+            # e.g. query company-specific data, fetch logs from your existing observability tools, etc
+            # To check how to add a customized toolset, please refer to https://docs.robusta.dev/master/configuration/holmesgpt/custom_toolsets.html#custom-toolsets
+            # To find all built-in toolsets, please refer to https://docs.robusta.dev/master/configuration/holmesgpt/builtin_toolsets.html
+            toolsets:
+              # add a new json processor toolset
+              json_processor:
+                description: "A toolset for processing JSON data using jq"
+                prerequisites:
+                  - command: "jq --version"  # Ensure jq is installed
+                tools:
+                  - name: "process_json"
+                    description: "A tool that uses jq to process JSON input"
+                    command: "echo '{{ json_input }}' | jq '.'"  # Example jq command to format JSON
+              # disable a built-in toolsets
+              aks/core:
+                enabled: false
+              ```
+"""
@@ -23,7 +23,7 @@
     validate_nat_gateway_idle_timeout,
     validate_nat_gateway_managed_outbound_ip_count,
 )
-# from azure.cli.core.api import get_config_dir
+from azure.cli.core.api import get_config_dir
 from azure.cli.core.commands.parameters import (
     edge_zone_type,
     file_type,
@@ -150,7 +150,8 @@
     CONST_ADVANCED_NETWORKPOLICIES_FQDN,
     CONST_ADVANCED_NETWORKPOLICIES_L7,
     CONST_TRANSIT_ENCRYPTION_TYPE_NONE,
-    CONST_TRANSIT_ENCRYPTION_TYPE_WIREGUARD
+    CONST_TRANSIT_ENCRYPTION_TYPE_WIREGUARD,
+    CONST_AGENT_CONFIG_FILE_NAME,
 )
 
 from azext_aks_preview._validators import (
@@ -224,7 +225,7 @@
     validate_max_blocked_nodes,
     validate_resource_group_parameter,
     validate_location_resource_group_cluster_parameters,
-    # validate_agent_config_file,
+    validate_agent_config_file,
 )
 from azext_aks_preview.azurecontainerstorage._consts import (
     CONST_ACSTOR_ALL,
@@ -2780,70 +2781,69 @@ def load_arguments(self, _):
             action="store_true",
         )
 
-# pylint: disable=line-too-long
-#     with self.argument_context("aks agent") as c:
-#         c.positional(
-#             "prompt",
-#             help="Ask any question and answer using available tools.",
-#         )
-#         c.argument(
-#             "resource_group_name",
-#             options_list=["--resource-group", "-g"],
-#             help="Name of resource group.",
-#             required=False,
-#         )
-#         c.argument(
-#             "name",
-#             options_list=["--name", "-n"],
-#             help="Name of the managed cluster.",
-#             required=False,
-#         )
-#         c.argument(
-#             "max_steps",
-#             type=int,
-#             default=10,
-#             required=False,
-#             help="Maximum number of steps the LLM can take to investigate the issue.",
-#         )
-#         c.argument(
-#             "config_file",
-#             default=os.path.join(get_config_dir(), "aksAgent.config"),
-#             validator=validate_agent_config_file,
-#             required=False,
-#             help="Path to the config file.",
-#         )
-#         c.argument(
-#             "model",
-#             help="The model to use for the LLM.",
-#             required=False,
-#             type=str,
-#         )
-#         c.argument(
-#             "api-key",
-#             help="API key to use for the LLM (if not given, uses environment variables AZURE_API_KEY, OPENAI_API_KEY)",
-#             required=False,
-#             type=str,
-#         )
-#         c.argument(
-#             "no_interactive",
-#             help="Disable interactive mode. When set, the agent will not prompt for input and will run in batch mode.",
-#             action="store_true",
-#         )
-#         c.argument(
-#             "no_echo_request",
-#             help="Disable echoing back the question provided to AKS Agent in the output.",
-#             action="store_true",
-#         )
-#         c.argument(
-#             "show_tool_output",
-#             help="Show the output of each tool that was called.",
-#             action="store_true",
-#         )
-#         c.argument(
-#             "refresh_toolsets",
-#             help="Refresh the toolsets status.",
-#             action="store_true",
-#         )
+    with self.argument_context("aks agent") as c:
+        c.positional(
+            "prompt",
+            help="Ask any question and answer using available tools.",
+        )
+        c.argument(
+            "resource_group_name",
+            options_list=["--resource-group", "-g"],
+            help="Name of resource group.",
+            required=False,
+        )
+        c.argument(
+            "name",
+            options_list=["--name", "-n"],
+            help="Name of the managed cluster.",
+            required=False,
+        )
+        c.argument(
+            "max_steps",
+            type=int,
+            default=10,
+            required=False,
+            help="Maximum number of steps the LLM can take to investigate the issue.",
+        )
+        c.argument(
+            "config_file",
+            default=os.path.join(get_config_dir(), CONST_AGENT_CONFIG_FILE_NAME),
+            validator=validate_agent_config_file,
+            required=False,
+            help="Path to the config file.",
+        )
+        c.argument(
+            "model",
+            help="The model to use for the LLM.",
+            required=False,
+            type=str,
+        )
+        c.argument(
+            "api-key",
+            help="API key to use for the LLM (if not given, uses environment variables AZURE_API_KEY, OPENAI_API_KEY)",
+            required=False,
+            type=str,
+        )
+        c.argument(
+            "no_interactive",
+            help="Disable interactive mode. When set, the agent will not prompt for input and will run in batch mode.",
+            action="store_true",
+        )
+        c.argument(
+            "no_echo_request",
+            help="Disable echoing back the question provided to AKS Agent in the output.",
+            action="store_true",
+        )
+        c.argument(
+            "show_tool_output",
+            help="Show the output of each tool that was called.",
+            action="store_true",
+        )
+        c.argument(
+            "refresh_toolsets",
+            help="Refresh the toolsets status.",
+            action="store_true",
+        )
 
 
 def _get_default_install_location(exe_name):
-Original file line number
+Diff line change
@@ Expand Up @@
     Pending
     +++++++
+.0.0b28
+    +++++++
+    * Add interactive AI-powered debugging tool `az aks agent`.
 .0.0b27
     +++++++
     * Add framework for interactive AI-powered debugging tool.
@@ Expand Down @@