Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions linter_exclusions.yml
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,11 @@ aks update:
cluster_service_load_balancer_health_probe_mode:
rule_exclusions:
- option_length_too_long
aks agent:
parameters:
prompt:
rule_exclusions:
- no_positional_parameters
arcdata dc config init:
parameters:
path:
Expand Down
3 changes: 2 additions & 1 deletion src/aks-preview/HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,11 @@ To release a new version, please select a new version number (usually plus 1 to

Pending
+++++++
* Add framework for interactive AI-powered debugging tool.

18.0.0b26
+++++++
* Add `az aks identity-binding` command group for identity binding feataure.
* Add `az aks identity-binding` command group for identity binding feature.

18.0.0b25
+++++++
Expand Down
6 changes: 6 additions & 0 deletions src/aks-preview/azext_aks_preview/_consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -373,3 +373,9 @@
CONST_K8S_EXTENSION_NAME = "k8s-extension"
CONST_K8S_EXTENSION_ACTION_MOD_NAME = "azext_k8s_extension.action"
CONST_K8S_EXTENSION_FORMAT_MOD_NAME = "azext_k8s_extension._format"

# aks agent constants
CONST_AGENT_CONFIG_PATH_DIR_ENV_KEY = "HOLMES_CONFIGPATH_DIR"
CONST_AGENT_NAME = "AKS AGENT"
CONST_AGENT_NAME_ENV_KEY = "AGENT_NAME"
CONST_AGENT_CONFIG_FILE_NAME = "aksAgent.config"
98 changes: 98 additions & 0 deletions src/aks-preview/azext_aks_preview/_help.py
Original file line number Diff line number Diff line change
Expand Up @@ -3943,3 +3943,101 @@
type: string
short-summary: Name of the identity binding to show.
"""

# pylint: disable=line-too-long
# helps[
# "aks agent"
# ] = """
# type: command
# short-summary: Run AI assistant to analyze and troubleshoot Kubernetes clusters.
# long-summary: |-
# This command allows you to ask questions about your Azure Kubernetes cluster and get answers using AI models.
# Environment variables must be set to use the AI model, please refer to https://docs.litellm.ai/docs/providers to learn more about supported AI providers and models and required environment variables.
# parameters:
# - name: --name -n
# type: string
# short-summary: Name of the managed cluster.
# - name: --resource-group -g
# type: string
# short-summary: Name of the resource group.
# - name: --model
# type: string
# short-summary: Model to use for the LLM.
# - name: --api-key
# type: string
# short-summary: API key to use for the LLM (if not given, uses environment variables AZURE_API_KEY, OPENAI_API_KEY).
# - name: --config-file
# type: string
# short-summary: Path to configuration file.
# - name: --max-steps
# type: int
# short-summary: Maximum number of steps the LLM can take to investigate the issue.
# - name: --no-interactive
# type: bool
# short-summary: Disable interactive mode. When set, the agent will not prompt for input and will run in batch mode.
# - name: --no-echo-request
# type: bool
# short-summary: Disable echoing back the question provided to AKS Agent in the output.
# - name: --show-tool-output
# type: bool
# short-summary: Show the output of each tool that was called during the analysis.
# - name: --refresh-toolsets
# type: bool
# short-summary: Refresh the toolsets status.
#
# examples:
# - name: Ask about pod issues in the cluster with Azure OpenAI
# text: |-
# export AZURE_API_BASE="https://my-azureopenai-service.openai.azure.com/"
# export AZURE_API_VERSION="2025-01-01-preview"
# export AZURE_API_KEY="sk-xxx"
# az aks agent "Why are my pods not starting?" --name MyManagedCluster --resource-group MyResourceGroup --model azure/my-gpt4.1-deployment
# - name: Ask about pod issues in the cluster with OpenAI
# text: |-
# export OPENAI_API_KEY="sk-xxx"
# az aks agent "Why are my pods not starting?" --name MyManagedCluster --resource-group MyResourceGroup --model gpt-4o
# text: az aks agent "Why are my pods not starting?"
# - name: Run in interactive mode without a question
# text: az aks agent "Check the pod status in my cluster" --name MyManagedCluster --resource-group MyResourceGroup --model azure/my-gpt4.1-deployment --api-key "sk-xxx"
# - name: Run in non-interactive batch mode
# text: az aks agent "Diagnose networking issues" --no-interactive --max-steps 15 --model azure/my-gpt4.1-deployment
# - name: Show detailed tool output during analysis
# text: az aks agent "Why is my service workload unavailable in namespace workload-ns?" --show-tool-output --model azure/my-gpt4.1-deployment
# - name: Use custom configuration file
# text: az aks agent "Check kubernetes pod resource usage" --config-file /path/to/custom.config --model azure/my-gpt4.1-deployment
# - name: Run agent with no echo of the original question
# text: az aks agent "What is the status of my cluster?" --no-echo-request --model azure/my-gpt4.1-deployment
# - name: Refresh toolsets to get the latest available tools
# text: az aks agent "What is the status of my cluster?" --refresh-toolsets --model azure/my-gpt4.1-deploymen
# - name: Run agent with config file
# text: |
# az aks agent "Check kubernetes pod resource usage" --config-file /path/to/custom.config
# Here is an example of config file:
# ```json
# model: "gpt-4o"
# api_key: "..."
# # define a list of mcp servers, mcp server can be defined
# mcp_servers:
# aks_mcp:
# description: "The AKS-MCP is a Model Context Protocol (MCP) server that enables AI assistants to interact with Azure Kubernetes Service (AKS) clusters"
# url: "http://localhost:8003/sse"
#
# # try adding your own tools or toggle the built-in toolsets here
# # e.g. query company-specific data, fetch logs from your existing observability tools, etc
# # To check how to add a customized toolset, please refer to https://docs.robusta.dev/master/configuration/holmesgpt/custom_toolsets.html#custom-toolsets
# # To find all built-in toolsets, please refer to https://docs.robusta.dev/master/configuration/holmesgpt/builtin_toolsets.html
# toolsets:
# # add a new json processor toolset
# json_processor:
# description: "A toolset for processing JSON data using jq"
# prerequisites:
# - command: "jq --version" # Ensure jq is installed
# tools:
# - name: "process_json"
# description: "A tool that uses jq to process JSON input"
# command: "echo '{{ json_input }}' | jq '.'" # Example jq command to format JSON
# # disable a built-in toolsets
# aks/core:
# enabled: false
# ```
# """
67 changes: 67 additions & 0 deletions src/aks-preview/azext_aks_preview/_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
validate_nat_gateway_idle_timeout,
validate_nat_gateway_managed_outbound_ip_count,
)
# from azure.cli.core.api import get_config_dir
from azure.cli.core.commands.parameters import (
edge_zone_type,
file_type,
Expand Down Expand Up @@ -223,6 +224,7 @@
validate_max_blocked_nodes,
validate_resource_group_parameter,
validate_location_resource_group_cluster_parameters,
# validate_agent_config_file,
)
from azext_aks_preview.azurecontainerstorage._consts import (
CONST_ACSTOR_ALL,
Expand Down Expand Up @@ -2775,6 +2777,71 @@ def load_arguments(self, _):
action="store_true",
)

# pylint: disable=line-too-long
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

May I ask why these lines of code need to be commented out? If the parameters should be removed, please delete them directly.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We comment them out because these code will be introduced by one week or so later.
Currently we are under private preview to not expose this feature the user, unless the private preview looks good.

Copy link
Member Author

@mainred mainred Aug 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removing the agent subcommand from commands.py requires us to remove the parameters and helps to meet the requirement of style check. We want to keep as many code as possible though.
Keeping them here so that we can bring them back soon. If you are against from commenting so many code, I am fine to remove them directly.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Got it. Just commenting them out temporarily is fine.

# with self.argument_context("aks agent") as c:
# c.positional(
# "prompt",
# help="Ask any question and answer using available tools.",
# )
# c.argument(
# "resource_group_name",
# options_list=["--resource-group", "-g"],
# help="Name of resource group.",
# required=False,
# )
# c.argument(
# "name",
# options_list=["--name", "-n"],
# help="Name of the managed cluster.",
# required=False,
# )
# c.argument(
# "max_steps",
# type=int,
# default=10,
# required=False,
# help="Maximum number of steps the LLM can take to investigate the issue.",
# )
# c.argument(
# "config_file",
# default=os.path.join(get_config_dir(), "aksAgent.config"),
# validator=validate_agent_config_file,
# required=False,
# help="Path to the config file.",
# )
# c.argument(
# "model",
# help="The model to use for the LLM.",
# required=False,
# type=str,
# )
# c.argument(
# "api-key",
# help="API key to use for the LLM (if not given, uses environment variables AZURE_API_KEY, OPENAI_API_KEY)",
# required=False,
# type=str,
# )
# c.argument(
# "no_interactive",
# help="Disable interactive mode. When set, the agent will not prompt for input and will run in batch mode.",
# action="store_true",
# )
# c.argument(
# "no_echo_request",
# help="Disable echoing back the question provided to AKS Agent in the output.",
# action="store_true",
# )
# c.argument(
# "show_tool_output",
# help="Show the output of each tool that was called.",
# action="store_true",
# )
# c.argument(
# "refresh_toolsets",
# help="Refresh the toolsets status.",
# action="store_true",
# )


def _get_default_install_location(exe_name):
system = platform.system()
Expand Down
38 changes: 38 additions & 0 deletions src/aks-preview/azext_aks_preview/_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,12 @@
import os
import os.path
import re
import yaml
from ipaddress import ip_network
from math import isclose, isnan

from azure.cli.core import keys
from azure.cli.core.api import get_config_dir
from azure.cli.core.azclierror import (
ArgumentUsageError,
InvalidArgumentValueError,
Expand All @@ -35,6 +37,7 @@
CONST_NETWORK_POD_IP_ALLOCATION_MODE_STATIC_BLOCK,
CONST_NODEPOOL_MODE_GATEWAY,
CONST_AZURE_SERVICE_MESH_MAX_EGRESS_NAME_LENGTH,
CONST_AGENT_CONFIG_FILE_NAME,
)
from azext_aks_preview._helpers import _fuzzy_match
from knack.log import get_logger
Expand Down Expand Up @@ -977,3 +980,38 @@ def validate_location_resource_group_cluster_parameters(namespace):
raise MutuallyExclusiveArgumentError(
"Cannot specify --location and --resource-group and --cluster at the same time."
)


def _validate_param_yaml_file(yaml_path, param_name):
if not yaml_path:
return
if not os.path.exists(yaml_path):
raise InvalidArgumentValueError(
f"--{param_name}={yaml_path}: file is not found."
)
if not os.access(yaml_path, os.R_OK):
raise InvalidArgumentValueError(
f"--{param_name}={yaml_path}: file is not readable."
)
try:
with open(yaml_path, "r") as file:
yaml.safe_load(file)
except yaml.YAMLError as e:
raise InvalidArgumentValueError(
f"--{param_name}={yaml_path}: file is not a valid YAML file: {e}"
)
except Exception as e:
raise InvalidArgumentValueError(
f"--{param_name}={yaml_path}: An error occurred while reading the config file: {e}"
)


def validate_agent_config_file(namespace):
config_file = namespace.config_file
if not config_file:
return
default_config_path = os.path.join(get_config_dir(), CONST_AGENT_CONFIG_FILE_NAME)
if config_file == default_config_path and not os.path.exists(config_file):
return

_validate_param_yaml_file(config_file, "config-file")
Empty file.
Loading
Loading