Azure
diff --git a/‎src/aks-agent/HISTORY.rst‎
Lines changed: 4 additions & 0 deletions b/‎src/aks-agent/HISTORY.rst‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎src/aks-agent/azext_aks_agent/__init__.py‎
Lines changed: 0 additions & 23 deletions b/‎src/aks-agent/azext_aks_agent/__init__.py‎
Lines changed: 0 additions & 23 deletions
diff --git a/‎src/aks-agent/azext_aks_agent/_consts.py‎
Lines changed: 13 additions & 3 deletions b/‎src/aks-agent/azext_aks_agent/_consts.py‎
Lines changed: 13 additions & 3 deletions
diff --git a/‎src/aks-agent/azext_aks_agent/_help.py‎
Lines changed: 33 additions & 85 deletions b/‎src/aks-agent/azext_aks_agent/_help.py‎
Lines changed: 33 additions & 85 deletions
diff --git a/‎src/aks-agent/azext_aks_agent/_params.py‎
Lines changed: 7 additions & 27 deletions b/‎src/aks-agent/azext_aks_agent/_params.py‎
Lines changed: 7 additions & 27 deletions
diff --git a/‎src/aks-agent/azext_aks_agent/_validators.py‎
Lines changed: 0 additions & 53 deletions b/‎src/aks-agent/azext_aks_agent/_validators.py‎
Lines changed: 0 additions & 53 deletions
@@ -12,6 +12,10 @@ To release a new version, please select a new version number (usually plus 1 to
 Pending
 +++++++
 
+1.0.0b12
+++++++++
+
+
 1.0.0b11
 ++++++++
 * Fix(agent-init): replace max_tokens with max_completion_tokens for connection check of Azure OpenAI service.
 
@@ -4,20 +4,8 @@
 # --------------------------------------------------------------------------------------------
 
 
-import os
-
 # pylint: disable=unused-import
-import azext_aks_agent._help
-from azext_aks_agent._consts import (
-    CONST_AGENT_CONFIG_PATH_DIR_ENV_KEY,
-    CONST_AGENT_NAME,
-    CONST_AGENT_NAME_ENV_KEY,
-    CONST_DISABLE_PROMETHEUS_TOOLSET_ENV_KEY,
-    CONST_PRIVACY_NOTICE_BANNER,
-    CONST_PRIVACY_NOTICE_BANNER_ENV_KEY,
-)
 from azure.cli.core import AzCommandsLoader
-from azure.cli.core.api import get_config_dir
 
 
 class ContainerServiceCommandsLoader(AzCommandsLoader):
@@ -44,14 +32,3 @@ def load_arguments(self, command):
 
 
 COMMAND_LOADER_CLS = ContainerServiceCommandsLoader
-
-
-# NOTE(mainred): holmesgpt leverages the environment variables to customize its behavior.
-def customize_holmesgpt():
-    os.environ[CONST_DISABLE_PROMETHEUS_TOOLSET_ENV_KEY] = "true"
-    os.environ[CONST_AGENT_CONFIG_PATH_DIR_ENV_KEY] = get_config_dir()
-    os.environ[CONST_AGENT_NAME_ENV_KEY] = CONST_AGENT_NAME
-    os.environ[CONST_PRIVACY_NOTICE_BANNER_ENV_KEY] = CONST_PRIVACY_NOTICE_BANNER
-
-
-customize_holmesgpt()
@@ -30,6 +30,16 @@
 CONST_MCP_GITHUB_REPO = "Azure/aks-mcp"
 CONST_MCP_BINARY_DIR = "bin"
 
-# Color constants for terminal output
-HELP_COLOR = "cyan"  # same as AI_COLOR for now
-ERROR_COLOR = "red"
+# Kubernetes WebSocket exec protocol constants
+RESIZE_CHANNEL = 4  # WebSocket channel for terminal resize messages
+# WebSocket heartbeat configuration (matching kubectl client-go)
+# Based on kubernetes/client-go/tools/remotecommand/websocket.go#L59-L65
+# pingPeriod = 5 * time.Second
+# pingReadDeadline = (pingPeriod * 12) + (1 * time.Second)
+# The read deadline is calculated to allow up to 12 missed pings plus 1 second buffer
+# This provides tolerance for network delays while detecting actual connection failures
+HEARTBEAT_INTERVAL = 5.0                              # pingPeriod: 5 seconds between pings
+HEARTBEAT_TIMEOUT = (HEARTBEAT_INTERVAL * 12) + 1    # pingReadDeadline: 61 seconds total timeout
+
+AGENT_NAMESPACE = "kube-system"
+AGENT_LABEL_SELECTOR = "app.kubernetes.io/name=aks-agent"
@@ -5,7 +5,6 @@
 # --------------------------------------------------------------------------------------------
 
 # pylint: disable=too-many-lines
-
 from knack.help_files import helps
 
 helps[
@@ -15,14 +14,19 @@
     short-summary: Run AI assistant to analyze and troubleshoot Kubernetes clusters.
     long-summary: |-
       This command allows you to ask questions about your Azure Kubernetes cluster and get answers using AI models.
-      No need to manually set environment variables! All model and credential information can be configured interactively using `az aks agent-init` or via a config file.
     parameters:
         - name: --name -n
           type: string
           short-summary: Name of the managed cluster.
         - name: --resource-group -g
           type: string
           short-summary: Name of the resource group.
+        - name: --init
+          type: bool
+          short-summary: Initialize and deploy the AKS agent to the cluster.
+          long-summary: |-
+            Run the interactive initialization wizard to configure LLM settings, cluster role permissions,
+            and deploy the AKS agent Helm chart to your cluster. Required when first setting up the agent.
         - name: --model
           type: string
           short-summary: Specify the LLM provider and model or deployment to use for the AI assistant.
@@ -33,12 +37,6 @@
             Each provider may require different environment variables and model naming conventions.
             For a full list of supported providers, model patterns, and required environment variables, see https://docs.litellm.ai/docs/providers.
             Note: For Azure OpenAI, it is recommended to set the deployment name as the model name until https://github.com/BerriAI/litellm/issues/13950 is resolved.
-        - name: --api-key
-          type: string
-          short-summary: API key to use for the LLM (if not given, uses environment variables AZURE_API_KEY, OPENAI_API_KEY). (Deprecated)
-        - name: --config-file
-          type: string
-          short-summary: Path to configuration file.
         - name: --max-steps
           type: int
           short-summary: Maximum number of steps the LLM can take to investigate the issue.
@@ -56,93 +54,43 @@
           short-summary: Refresh the toolsets status.
         - name: --status
           type: bool
-          short-summary: Show AKS agent configuration and status information.
-        - name: --aks-mcp
+          short-summary: Show AKS agent deployment status including helm release, deployments, and pod information.
+        - name: --cleanup
           type: bool
-          short-summary: Enable AKS MCP integration for enhanced capabilities. Traditional mode is the default.
+          short-summary: Uninstall the AKS agent and delete all associated resources from the cluster.
 
     examples:
-        - name: Ask about pod issues in the cluster with last configured model
+        - name: Initialize and deploy AKS agent to a cluster
           text: |-
-            az aks agent "Why are my pods not starting?" --name MyManagedCluster --resource-group MyResourceGroup
-        - name: Ask about pod issues in the cluster with Azure OpenAI
+            az aks agent --init --resource-group myResourceGroup --name myAKSCluster
+        - name: Check AKS agent deployment status
           text: |-
-            az aks agent "Why are my pods not starting?" --name MyManagedCluster --resource-group MyResourceGroup --model azure/gpt-4.1
+            az aks agent --status
         - name: Ask about pod issues in the cluster with OpenAI
           text: |-
-            az aks agent "Why are my pods not starting?" --name MyManagedCluster --resource-group MyResourceGroup --model gpt-4o
-        - name: Run agent with config file
-          text: |
-            az aks agent "Check kubernetes pod resource usage" --config-file /path/to/custom.yaml --name MyManagedCluster --resource-group MyResourceGroup
-            Here is an example of config file:
-            ```json
-            llms:
-            - provider: azure
-              MODEL_NAME: gpt-4.1
-              AZURE_API_KEY: *******
-              AZURE_API_BASE: https://{azure-openai-service-name}.openai.azure.com/
-              AZURE_API_VERSION: 2025-04-01-preview
-            # define a list of mcp servers, mcp server can be defined
-            mcp_servers:
-              aks_mcp:
-                description: "The AKS-MCP is a Model Context Protocol (MCP) server that enables AI assistants to interact with Azure Kubernetes Service (AKS) clusters"
-                url: "http://localhost:8003/sse"
-
-            # try adding your own tools or toggle the built-in toolsets here
-            # e.g. query company-specific data, fetch logs from your existing observability tools, etc
-            # To check how to add a customized toolset, please refer to https://docs.robusta.dev/master/configuration/holmesgpt/custom_toolsets.html#custom-toolsets
-            # To find all built-in toolsets, please refer to https://docs.robusta.dev/master/configuration/holmesgpt/builtin_toolsets.html
-            toolsets:
-              # add a new json processor toolset
-              json_processor:
-                description: "A toolset for processing JSON data using jq"
-                prerequisites:
-                  - command: "jq --version"  # Ensure jq is installed
-                tools:
-                  - name: "process_json"
-                    description: "A tool that uses jq to process JSON input"
-                    command: "echo '{{ json_input }}' | jq '.'"  # Example jq command to format JSON
-              # disable a built-in toolsets
-              aks/core:
-                enabled: false
-              ```
-        - name: Run in interactive mode without a question
-          text: az aks agent "Check the pod status in my cluster" --name MyManagedCluster --resource-group MyResourceGroup --model azure/gpt-4.1 --api-key "sk-xxx"
-        - name: Run in non-interactive batch mode
-          text: az aks agent "Diagnose networking issues" --no-interactive --max-steps 15 --model azure/gpt-4.1
-        - name: Show detailed tool output during analysis
-          text: az aks agent "Why is my service workload unavailable in namespace workload-ns?" --show-tool-output --model azure/gpt-4.1
-        - name: Use custom configuration file
-          text: az aks agent "Check kubernetes pod resource usage" --config-file /path/to/custom.yaml --model azure/gpt-4.1
-        - name: Run agent with no echo of the original question
-          text: az aks agent "What is the status of my cluster?" --no-echo-request --model azure/gpt-4.1
-        - name: Refresh toolsets to get the latest available tools
-          text: az aks agent "What is the status of my cluster?" --refresh-toolsets --model azure/gpt-4.1
-        - name: Show agent status (MCP readiness)
-          text: az aks agent --status
+            az aks agent "Why are my pods not starting?" --model gpt-4o
+        - name: Ask about pod issues in the cluster with last configured model
+          text: |-
+            az aks agent "Why are my pods not starting?"
+        - name: Ask about pod issues in the cluster with Azure OpenAI
+          text: |-
+            az aks agent "Why are my pods not starting?" --model azure/gpt-4.1
         - name: Run in interactive mode without a question
-          text: az aks agent "Check the pod status in my cluster" --name MyManagedCluster --resource-group MyResourceGroup --model azure/my-gpt4.1-deployment --api-key "sk-xxx"
+          text: |-
+            az aks agent "Check the pod status in my cluster" --model azure/gpt-4.1
         - name: Run in non-interactive batch mode
-          text: az aks agent "Diagnose networking issues" --no-interactive --max-steps 15 --model azure/my-gpt4.1-deployment
+          text: |-
+            az aks agent "Diagnose networking issues" --no-interactive --max-steps 15 --model azure/gpt-4.1
         - name: Show detailed tool output during analysis
-          text: az aks agent "Why is my service workload unavailable in namespace workload-ns?" --show-tool-output --model azure/my-gpt4.1-deployment
-        - name: Use custom configuration file
-          text: az aks agent "Check kubernetes pod resource usage" --config-file /path/to/custom.yaml --model azure/my-gpt4.1-deployment
+          text: |-
+            az aks agent "Why is my service workload unavailable in namespace workload-ns?" --show-tool-output --model azure/gpt-4.1
         - name: Run agent with no echo of the original question
-          text: az aks agent "What is the status of my cluster?" --no-echo-request --model azure/my-gpt4.1-deployment
+          text: |-
+            az aks agent "What is the status of my cluster?" --no-echo-request --model azure/gpt-4.1
         - name: Refresh toolsets to get the latest available tools
-          text: az aks agent "What is the status of my cluster?" --refresh-toolsets --model azure/my-gpt4.1-deployment
-"""
-
-helps[
-    "aks agent-init"
-] = """
-    type: command
-    short-summary: Initialize and validate LLM provider/model configuration for AKS agent.
-    long-summary: |-
-      This command interactively guides you to select an LLM provider and model, validates the connection, and saves the configuration for later use.
-      You can run this command multiple times to add or update different model configurations.
-    examples:
-        - name: Initialize configuration for Azure OpenAI, OpenAI or other llms
-          text: az aks agent-init
+          text: |-
+            az aks agent "What is the status of my cluster?" --refresh-toolsets --model azure/gpt-4.1
+        - name: Cleanup and uninstall AKS agent from the cluster
+          text: |-
+            az aks agent --cleanup
 """
@@ -4,14 +4,6 @@
 # --------------------------------------------------------------------------------------------
 
 # pylint: disable=too-many-statements,too-many-lines
-import os.path
-
-from azext_aks_agent._consts import CONST_AGENT_CONFIG_FILE_NAME
-from azext_aks_agent._validators import validate_agent_config_file
-from azure.cli.core.api import get_config_dir
-from azure.cli.core.commands.parameters import get_three_state_flag
-
-
 def load_arguments(self, _):
     with self.argument_context("aks agent") as c:
         c.positional(
@@ -40,24 +32,17 @@ def load_arguments(self, _):
             help="Maximum number of steps the LLM can take to investigate the issue.",
         )
         c.argument(
-            "config_file",
-            default=os.path.join(get_config_dir(), CONST_AGENT_CONFIG_FILE_NAME),
-            validator=validate_agent_config_file,
+            "init",
             required=False,
-            help="Path to the config file.",
+            help="Initialize llm configurations and aks-agent environment on the AKS cluster.",
+            action="store_true",
         )
         c.argument(
             "model",
             help=" Specify the LLM provider and model or deployment to use for the AI assistant.",
             required=False,
             type=str,
         )
-        c.argument(
-            "api_key",
-            help="API key to use for the LLM (if not given, uses environment variables AZURE_API_KEY, OPENAI_API_KEY)",
-            required=False,
-            type=str,
-        )
         c.argument(
             "no_interactive",
             help="Disable interactive mode. When set, the agent will not prompt for input and will run in batch mode.",
@@ -85,13 +70,8 @@ def load_arguments(self, _):
             help="Show AKS agent configuration and status information.",
         )
         c.argument(
-            "use_aks_mcp",
-            options_list=["--aks-mcp"],
-            default=False,
-            arg_type=get_three_state_flag(),
-            help=(
-                "Enable AKS MCP integration for enhanced capabilities. "
-                "Traditional mode is the default. Use --aks-mcp to enable MCP mode, or "
-                "--no-aks-mcp to explicitly disable it."
-            ),
+            "cleanup",
+            options_list=["--cleanup"],
+            action="store_true",
+            help="Remove aks-agent resources on the AKS cluster.",
         )