Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/aks-preview/HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@ To release a new version, please select a new version number (usually plus 1 to
Pending
+++++++

18.0.0b28
+++++++
* Add interactive AI-powered debugging tool `az aks agent`.

18.0.0b27
+++++++
* Add framework for interactive AI-powered debugging tool.
Expand Down
2 changes: 1 addition & 1 deletion src/aks-preview/azext_aks_preview/_consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,4 +378,4 @@
CONST_AGENT_CONFIG_PATH_DIR_ENV_KEY = "HOLMES_CONFIGPATH_DIR"
CONST_AGENT_NAME = "AKS AGENT"
CONST_AGENT_NAME_ENV_KEY = "AGENT_NAME"
CONST_AGENT_CONFIG_FILE_NAME = "aksAgent.config"
CONST_AGENT_CONFIG_FILE_NAME = "aksAgent.yaml"
192 changes: 95 additions & 97 deletions src/aks-preview/azext_aks_preview/_help.py
Original file line number Diff line number Diff line change
Expand Up @@ -3953,100 +3953,98 @@
short-summary: Name of the identity binding to show.
"""

# pylint: disable=line-too-long
# helps[
# "aks agent"
# ] = """
# type: command
# short-summary: Run AI assistant to analyze and troubleshoot Kubernetes clusters.
# long-summary: |-
# This command allows you to ask questions about your Azure Kubernetes cluster and get answers using AI models.
# Environment variables must be set to use the AI model, please refer to https://docs.litellm.ai/docs/providers to learn more about supported AI providers and models and required environment variables.
# parameters:
# - name: --name -n
# type: string
# short-summary: Name of the managed cluster.
# - name: --resource-group -g
# type: string
# short-summary: Name of the resource group.
# - name: --model
# type: string
# short-summary: Model to use for the LLM.
# - name: --api-key
# type: string
# short-summary: API key to use for the LLM (if not given, uses environment variables AZURE_API_KEY, OPENAI_API_KEY).
# - name: --config-file
# type: string
# short-summary: Path to configuration file.
# - name: --max-steps
# type: int
# short-summary: Maximum number of steps the LLM can take to investigate the issue.
# - name: --no-interactive
# type: bool
# short-summary: Disable interactive mode. When set, the agent will not prompt for input and will run in batch mode.
# - name: --no-echo-request
# type: bool
# short-summary: Disable echoing back the question provided to AKS Agent in the output.
# - name: --show-tool-output
# type: bool
# short-summary: Show the output of each tool that was called during the analysis.
# - name: --refresh-toolsets
# type: bool
# short-summary: Refresh the toolsets status.
#
# examples:
# - name: Ask about pod issues in the cluster with Azure OpenAI
# text: |-
# export AZURE_API_BASE="https://my-azureopenai-service.openai.azure.com/"
# export AZURE_API_VERSION="2025-01-01-preview"
# export AZURE_API_KEY="sk-xxx"
# az aks agent "Why are my pods not starting?" --name MyManagedCluster --resource-group MyResourceGroup --model azure/my-gpt4.1-deployment
# - name: Ask about pod issues in the cluster with OpenAI
# text: |-
# export OPENAI_API_KEY="sk-xxx"
# az aks agent "Why are my pods not starting?" --name MyManagedCluster --resource-group MyResourceGroup --model gpt-4o
# text: az aks agent "Why are my pods not starting?"
# - name: Run in interactive mode without a question
# text: az aks agent "Check the pod status in my cluster" --name MyManagedCluster --resource-group MyResourceGroup --model azure/my-gpt4.1-deployment --api-key "sk-xxx"
# - name: Run in non-interactive batch mode
# text: az aks agent "Diagnose networking issues" --no-interactive --max-steps 15 --model azure/my-gpt4.1-deployment
# - name: Show detailed tool output during analysis
# text: az aks agent "Why is my service workload unavailable in namespace workload-ns?" --show-tool-output --model azure/my-gpt4.1-deployment
# - name: Use custom configuration file
# text: az aks agent "Check kubernetes pod resource usage" --config-file /path/to/custom.config --model azure/my-gpt4.1-deployment
# - name: Run agent with no echo of the original question
# text: az aks agent "What is the status of my cluster?" --no-echo-request --model azure/my-gpt4.1-deployment
# - name: Refresh toolsets to get the latest available tools
# text: az aks agent "What is the status of my cluster?" --refresh-toolsets --model azure/my-gpt4.1-deploymen
# - name: Run agent with config file
# text: |
# az aks agent "Check kubernetes pod resource usage" --config-file /path/to/custom.config
# Here is an example of config file:
# ```json
# model: "gpt-4o"
# api_key: "..."
# # define a list of mcp servers, mcp server can be defined
# mcp_servers:
# aks_mcp:
# description: "The AKS-MCP is a Model Context Protocol (MCP) server that enables AI assistants to interact with Azure Kubernetes Service (AKS) clusters"
# url: "http://localhost:8003/sse"
#
# # try adding your own tools or toggle the built-in toolsets here
# # e.g. query company-specific data, fetch logs from your existing observability tools, etc
# # To check how to add a customized toolset, please refer to https://docs.robusta.dev/master/configuration/holmesgpt/custom_toolsets.html#custom-toolsets
# # To find all built-in toolsets, please refer to https://docs.robusta.dev/master/configuration/holmesgpt/builtin_toolsets.html
# toolsets:
# # add a new json processor toolset
# json_processor:
# description: "A toolset for processing JSON data using jq"
# prerequisites:
# - command: "jq --version" # Ensure jq is installed
# tools:
# - name: "process_json"
# description: "A tool that uses jq to process JSON input"
# command: "echo '{{ json_input }}' | jq '.'" # Example jq command to format JSON
# # disable a built-in toolsets
# aks/core:
# enabled: false
# ```
# """
helps[
"aks agent"
] = """
type: command
short-summary: Run AI assistant to analyze and troubleshoot Kubernetes clusters.
long-summary: |-
This command allows you to ask questions about your Azure Kubernetes cluster and get answers using AI models.
Environment variables must be set to use the AI model, please refer to https://docs.litellm.ai/docs/providers to learn more about supported AI providers and models and required environment variables.
parameters:
- name: --name -n
type: string
short-summary: Name of the managed cluster.
- name: --resource-group -g
type: string
short-summary: Name of the resource group.
- name: --model
type: string
short-summary: Model to use for the LLM.
- name: --api-key
type: string
short-summary: API key to use for the LLM (if not given, uses environment variables AZURE_API_KEY, OPENAI_API_KEY).
- name: --config-file
type: string
short-summary: Path to configuration file.
- name: --max-steps
type: int
short-summary: Maximum number of steps the LLM can take to investigate the issue.
- name: --no-interactive
type: bool
short-summary: Disable interactive mode. When set, the agent will not prompt for input and will run in batch mode.
- name: --no-echo-request
type: bool
short-summary: Disable echoing back the question provided to AKS Agent in the output.
- name: --show-tool-output
type: bool
short-summary: Show the output of each tool that was called during the analysis.
- name: --refresh-toolsets
type: bool
short-summary: Refresh the toolsets status.

examples:
- name: Ask about pod issues in the cluster with Azure OpenAI
text: |-
export AZURE_API_BASE="https://my-azureopenai-service.openai.azure.com/"
export AZURE_API_VERSION="2025-01-01-preview"
export AZURE_API_KEY="sk-xxx"
az aks agent "Why are my pods not starting?" --name MyManagedCluster --resource-group MyResourceGroup --model azure/my-gpt4.1-deployment
- name: Ask about pod issues in the cluster with OpenAI
text: |-
export OPENAI_API_KEY="sk-xxx"
az aks agent "Why are my pods not starting?" --name MyManagedCluster --resource-group MyResourceGroup --model gpt-4o
- name: Run in interactive mode without a question
text: az aks agent "Check the pod status in my cluster" --name MyManagedCluster --resource-group MyResourceGroup --model azure/my-gpt4.1-deployment --api-key "sk-xxx"
- name: Run in non-interactive batch mode
text: az aks agent "Diagnose networking issues" --no-interactive --max-steps 15 --model azure/my-gpt4.1-deployment
- name: Show detailed tool output during analysis
text: az aks agent "Why is my service workload unavailable in namespace workload-ns?" --show-tool-output --model azure/my-gpt4.1-deployment
- name: Use custom configuration file
text: az aks agent "Check kubernetes pod resource usage" --config-file /path/to/custom.yaml --model azure/my-gpt4.1-deployment
- name: Run agent with no echo of the original question
text: az aks agent "What is the status of my cluster?" --no-echo-request --model azure/my-gpt4.1-deployment
- name: Refresh toolsets to get the latest available tools
text: az aks agent "What is the status of my cluster?" --refresh-toolsets --model azure/my-gpt4.1-deployment
- name: Run agent with config file
text: |
az aks agent "Check kubernetes pod resource usage" --config-file /path/to/custom.yaml
Here is an example of config file:
```json
model: "gpt-4o"
api_key: "..."
# define a list of mcp servers, mcp server can be defined
mcp_servers:
aks_mcp:
description: "The AKS-MCP is a Model Context Protocol (MCP) server that enables AI assistants to interact with Azure Kubernetes Service (AKS) clusters"
url: "http://localhost:8003/sse"

# try adding your own tools or toggle the built-in toolsets here
# e.g. query company-specific data, fetch logs from your existing observability tools, etc
# To check how to add a customized toolset, please refer to https://docs.robusta.dev/master/configuration/holmesgpt/custom_toolsets.html#custom-toolsets
# To find all built-in toolsets, please refer to https://docs.robusta.dev/master/configuration/holmesgpt/builtin_toolsets.html
toolsets:
# add a new json processor toolset
json_processor:
description: "A toolset for processing JSON data using jq"
prerequisites:
- command: "jq --version" # Ensure jq is installed
tools:
- name: "process_json"
description: "A tool that uses jq to process JSON input"
command: "echo '{{ json_input }}' | jq '.'" # Example jq command to format JSON
# disable a built-in toolsets
aks/core:
enabled: false
```
"""
134 changes: 67 additions & 67 deletions src/aks-preview/azext_aks_preview/_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
validate_nat_gateway_idle_timeout,
validate_nat_gateway_managed_outbound_ip_count,
)
# from azure.cli.core.api import get_config_dir
from azure.cli.core.api import get_config_dir
from azure.cli.core.commands.parameters import (
edge_zone_type,
file_type,
Expand Down Expand Up @@ -150,7 +150,8 @@
CONST_ADVANCED_NETWORKPOLICIES_FQDN,
CONST_ADVANCED_NETWORKPOLICIES_L7,
CONST_TRANSIT_ENCRYPTION_TYPE_NONE,
CONST_TRANSIT_ENCRYPTION_TYPE_WIREGUARD
CONST_TRANSIT_ENCRYPTION_TYPE_WIREGUARD,
CONST_AGENT_CONFIG_FILE_NAME,
)

from azext_aks_preview._validators import (
Expand Down Expand Up @@ -224,7 +225,7 @@
validate_max_blocked_nodes,
validate_resource_group_parameter,
validate_location_resource_group_cluster_parameters,
# validate_agent_config_file,
validate_agent_config_file,
)
from azext_aks_preview.azurecontainerstorage._consts import (
CONST_ACSTOR_ALL,
Expand Down Expand Up @@ -2780,70 +2781,69 @@ def load_arguments(self, _):
action="store_true",
)

# pylint: disable=line-too-long
# with self.argument_context("aks agent") as c:
# c.positional(
# "prompt",
# help="Ask any question and answer using available tools.",
# )
# c.argument(
# "resource_group_name",
# options_list=["--resource-group", "-g"],
# help="Name of resource group.",
# required=False,
# )
# c.argument(
# "name",
# options_list=["--name", "-n"],
# help="Name of the managed cluster.",
# required=False,
# )
# c.argument(
# "max_steps",
# type=int,
# default=10,
# required=False,
# help="Maximum number of steps the LLM can take to investigate the issue.",
# )
# c.argument(
# "config_file",
# default=os.path.join(get_config_dir(), "aksAgent.config"),
# validator=validate_agent_config_file,
# required=False,
# help="Path to the config file.",
# )
# c.argument(
# "model",
# help="The model to use for the LLM.",
# required=False,
# type=str,
# )
# c.argument(
# "api-key",
# help="API key to use for the LLM (if not given, uses environment variables AZURE_API_KEY, OPENAI_API_KEY)",
# required=False,
# type=str,
# )
# c.argument(
# "no_interactive",
# help="Disable interactive mode. When set, the agent will not prompt for input and will run in batch mode.",
# action="store_true",
# )
# c.argument(
# "no_echo_request",
# help="Disable echoing back the question provided to AKS Agent in the output.",
# action="store_true",
# )
# c.argument(
# "show_tool_output",
# help="Show the output of each tool that was called.",
# action="store_true",
# )
# c.argument(
# "refresh_toolsets",
# help="Refresh the toolsets status.",
# action="store_true",
# )
with self.argument_context("aks agent") as c:
c.positional(
"prompt",
help="Ask any question and answer using available tools.",
)
c.argument(
"resource_group_name",
options_list=["--resource-group", "-g"],
help="Name of resource group.",
required=False,
)
c.argument(
"name",
options_list=["--name", "-n"],
help="Name of the managed cluster.",
required=False,
)
c.argument(
"max_steps",
type=int,
default=10,
required=False,
help="Maximum number of steps the LLM can take to investigate the issue.",
)
c.argument(
"config_file",
default=os.path.join(get_config_dir(), CONST_AGENT_CONFIG_FILE_NAME),
validator=validate_agent_config_file,
required=False,
help="Path to the config file.",
)
c.argument(
"model",
help="The model to use for the LLM.",
required=False,
type=str,
)
c.argument(
"api-key",
help="API key to use for the LLM (if not given, uses environment variables AZURE_API_KEY, OPENAI_API_KEY)",
required=False,
type=str,
)
c.argument(
"no_interactive",
help="Disable interactive mode. When set, the agent will not prompt for input and will run in batch mode.",
action="store_true",
)
c.argument(
"no_echo_request",
help="Disable echoing back the question provided to AKS Agent in the output.",
action="store_true",
)
c.argument(
"show_tool_output",
help="Show the output of each tool that was called.",
action="store_true",
)
c.argument(
"refresh_toolsets",
help="Refresh the toolsets status.",
action="store_true",
)


def _get_default_install_location(exe_name):
Expand Down
Loading
Loading