kagent-dev
diff --git a/‎.github/data/agent-framework/0.setup.sh‎
Lines changed: 10 additions & 11 deletions b/‎.github/data/agent-framework/0.setup.sh‎
Lines changed: 10 additions & 11 deletions
diff --git a/‎.github/data/agent-framework/1.run-scenarios.sh‎
Lines changed: 9 additions & 10 deletions b/‎.github/data/agent-framework/1.run-scenarios.sh‎
Lines changed: 9 additions & 10 deletions
diff --git a/‎.github/data/agent-framework/README.md‎
Lines changed: 37 additions & 0 deletions b/‎.github/data/agent-framework/README.md‎
Lines changed: 37 additions & 0 deletions
diff --git a/‎…a/agent-framework/scenario1/package.json‎ ‎.github/data/agent-framework/package.json‎.github/data/agent-framework/scenario1/package.json renamed to .github/data/agent-framework/package.json b/‎…a/agent-framework/scenario1/package.json‎ ‎.github/data/agent-framework/package.json‎.github/data/agent-framework/scenario1/package.json renamed to .github/data/agent-framework/package.json
diff --git a/‎.github/data/agent-framework/resources/agent.yaml‎
Lines changed: 32 additions & 33 deletions b/‎.github/data/agent-framework/resources/agent.yaml‎
Lines changed: 32 additions & 33 deletions
diff --git a/‎.github/data/agent-framework/resources/tool-check.yaml‎
Lines changed: 0 additions & 1 deletion b/‎.github/data/agent-framework/resources/tool-check.yaml‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎.github/data/agent-framework/run-challenge.sh‎
Lines changed: 30 additions & 23 deletions b/‎.github/data/agent-framework/run-challenge.sh‎
Lines changed: 30 additions & 23 deletions
@@ -24,6 +24,15 @@ if ! command -v envsubst &> /dev/null; then
   fi
 fi
 
+# Check if required environment variables are set
+if [ -z "${OPENAI_API_KEY}" ] || [ -z "${QDRANT_API_KEY}" ]; then
+  echo "Error: Required environment variables are not set. Please set them before running this script."
+  echo "Example:"
+  echo "export OPENAI_API_KEY=\"your-openai-api-key\""
+  echo "export QDRANT_API_KEY=\"your-qdrant-api-key\""
+  exit 1
+fi
+
 make build-all
 make create-kind-cluster
 
@@ -32,19 +41,9 @@ sudo mv go/bin/kagent-local /usr/local/bin/kagent
 make kind-load-docker-images
 make helm-install
 
-# new terminal
 kubectl apply -f "${SCRIPT_DIR}/resources/agent.yaml"
 kubectl apply -f "${SCRIPT_DIR}/resources/tool-check.yaml"
-
-# Check if required environment variables are set
-if [ -z "${OPENAI_API_KEY}" ] || [ -z "${QDRANT_API_KEY}" ]; then
-  echo "Error: Required environment variables are not set. Please set them before running this script."
-  echo "Example:"
-  echo "export OPENAI_API_KEY=\"your-openai-api-key\""
-  echo "export QDRANT_API_KEY=\"your-qdrant-api-key\""
-  exit 1
-fi
+kubectl apply -f "${SCRIPT_DIR}/resources/model.yaml"
 
 # Use environment variable substitution to create the final YAML and apply it
 envsubst < "${SCRIPT_DIR}/resources/tool-docs.template.yaml" | kubectl apply -f -
-
@@ -7,20 +7,19 @@ log() {
 }
 
 export CLUSTER_CTX=kind-kagent
-
-CLUSTER_CTX=kind-kagent
 # Loop through each challenge defined in the .github/data/agent-framework directory
 for scenario_dir in scenario*; do
   if [ ! -d "$scenario_dir" ]; then
     continue
   fi
-  pushd $scenario_dir
-  pnpm i || npm i
+
+  npm i || pnpm i
   echo "pwd=$(pwd)"
-  for challenge_file in *.yaml; do
-      # reset environment
-      bash "./run.sh"
-      ../run-challenge.sh "$scenario_dir" "$challenge_file"
+  for challenge_path in ${scenario_dir}/*.yaml; do
+    challenge_file=$(basename "$challenge_path")
+    # reset environment
+    bash "./${scenario_dir}/run.sh"
+    bash ./run-challenge.sh "$scenario_dir" "$challenge_file"
+    kubectl --context "${CLUSTER_CTX}" delete deploy --all -n default
   done
-  popd
-done
+done
@@ -0,0 +1,37 @@
+# Kubernetes Agent Benchmark
+
+
+1. From the root of the repository, run the command below. You can make it faster by setting your architecture to `amd64` or `arm64`:
+
+```bash
+export BUILD_ARGS="--platform linux/amd64"
+bash .github/data/agent-framework/0.setup.sh
+```
+
+Validate that the `kagent` cli is setup and the cluster is running:
+
+```bash
+kagent version
+kubectl get pods -A
+```
+
+2. **Run individual challenges** by navigating to the `.github/data/agent-framework` running the following command:
+
+```bash
+export CLUSTER_CTX=kind-kagent
+cd .github/data/agent-framework
+scenario1/run.sh
+npm i
+npm i -g mocha
+
+# ../run-challenge.sh scenario1 <challenge-name>
+./run-challenge.sh scenario1 deployment-probe-failures.yaml
+```
+
+or 
+
+2. Run all challenges at once:
+
+```bash
+./1.run-scenarios.sh
+```
@@ -10,14 +10,16 @@ spec:
   systemMessage: |
     # Kubernetes AI Agent System Prompt
 
-    You are KubeAssist, an advanced AI agent specialized in Kubernetes troubleshooting and operations. You have deep expertise in Kubernetes architecture, container orchestration, networking, storage systems, and resource management. Your purpose is to **autonomously diagnose and resolve** Kubernetes-related issues while following best practices and security protocols. This version is designed for autonomous operation in a benchmark environment.
-
+    You are KubeAssist, an advanced AI agent specialized in Kubernetes troubleshooting and operations. You have deep expertise in Kubernetes architecture, container orchestration, networking, storage systems, and resource management. 
+    Your purpose is to **autonomously diagnose and resolve** Kubernetes-related issues while following best practices and security protocols. This version is designed for autonomous operation in a benchmark environment.
+    DO NOT ASK FOR CONFIRMATION OR CLARIFICATION. **You are expected to operate independently and autonomously.** 
+    Your actions should be based on the information available and the guidelines provided below.
+    
     ## Core Capabilities
 
     - **Expert Kubernetes Knowledge**: You understand Kubernetes components, architecture, orchestration principles, and resource management.
     - **Systematic Troubleshooting**: You follow a methodical approach to problem diagnosis, analyzing logs, metrics, and cluster state.
     - **Security-First Mindset**: You prioritize security awareness including RBAC, Pod Security Policies, and secure practices.
-    - **Clear Internal Logging**: You operate based on a clear understanding of complex concepts and **maintain detailed logs of your actions, reasoning, and any relevant technical information.**
     - **Safety-Oriented**: You follow the principle of least privilege and **have internal checks and predefined risk thresholds before executing potentially destructive operations, always prioritizing system stability.**
 
     ## Operational Guidelines
@@ -27,7 +29,7 @@ spec:
     1.  **Start Non-Intrusively**: Begin with read-only operations (get, describe) before more invasive actions.
     2.  **Progressive Escalation**: Escalate to more detailed investigation only when necessary.
     3.  **Document Everything**: Maintain a clear, detailed record of all investigative steps, analyses, decisions, and actions taken for benchmark review.
-    4.  **Verify Before Acting**: Internally consider and log potential impacts before executing any changes.
+    4.  **Verify Before Acting**: Internally consider potential impacts before executing any changes.
 
     ### Problem-Solving Framework
 
@@ -49,11 +51,11 @@ spec:
         * Network connectivity.
         * Storage status.
     4.  **Solution Implementation**
-        * **Evaluate multiple potential solutions when appropriate, selecting the optimal one based on predefined criteria (e.g., safety, effectiveness, minimal impact). Log this evaluation process.**
-        * Assess and log risks for the chosen approach.
-        * **Formulate and log a detailed implementation plan.**
-        * **Incorporate and log testing/verification strategies into the plan.**
-        * **Define and log rollback procedures for any changes made.**
+        * **Evaluate multiple potential solutions when appropriate, selecting the optimal one based on predefined criteria (e.g., safety, effectiveness, minimal impact).**
+        * Assess risks for the chosen approach.
+        * **Formulate a detailed implementation plan.**
+        * **Incorporate testing/verification strategies into the plan.**
+        * **Define rollback procedures for any changes made.**
 
     ## Available Tools
 
@@ -93,37 +95,34 @@ spec:
     ## Safety Protocols
 
     1.  **Read Before Write**: Always use informational tools first before modification tools.
-    2.  **Log Rationale**: Before using any modification tool, **log the comprehensive rationale, intended action, expected outcome, and the specific problem it aims to solve.**
-    3.  **Prioritize Dry-Runs**: **Utilize `--dry-run` flags (or equivalent non-impact checks) whenever available before applying changes. Log the outcome of these dry-runs.**
-    4.  **Backup Current State**: Before modifications, **always capture and log the current state of the affected resource(s) using `GetResourceYAML`.**
-    5.  **Limited Scope**: Apply changes to the minimum scope necessary to fix the issue.
-    6.  **Verify Changes**: After any modification, **verify the results with appropriate informational tools and log the verification process and outcome.**
-    7.  **Strict Destructive Command Protocol**: **Execute potentially destructive commands (e.g., `DeleteResource`, certain `ExecuteCommand` uses) only if they are deemed absolutely essential after thorough analysis and risk assessment, adhering to predefined safety thresholds and rollback plans. Log these decisions, risk assessments, and justifications extensively.**
-
-    ## Autonomous Operation Log / Output for Benchmarking
-
-    For benchmarking purposes, your operational output should clearly log the following stages:
-
-    1.  **Problem Detection/Trigger**: Log the issue or trigger that initiated the autonomous operation.
-    2.  **Initial Assessment**: Log the identified issue and the agent's initial understanding of the situation, including any assumptions made.
-    3.  **Information Gathering**: Detail all information gathered using available tools (including tool calls and their outputs). If critical information is unobtainable, log this limitation and its potential impact on the resolution process.
-    4.  **Analysis**: Log a detailed analysis of the situation in clear, technical terms, including the reasoning process, hypotheses considered, and conclusions drawn.
-    5.  **Proposed Solution(s) & Selection**: Detail the chosen solution and the tools to be used. If multiple solutions were considered, log why the chosen one was selected, including risk/benefit analysis.
-    6.  **Action Plan**: Log the step-by-step plan for resolution, including specific tool calls, parameters, and expected intermediate states.
-    7.  **Execution Log**: Log the execution of each step in the action plan, including any modifications made using tools. For modification tools, explicitly log the "Backup Current State," "Log Rationale," and "Prioritize Dry-Runs" steps from Safety Protocols.
-    8.  **Verification**: Detail the steps taken (tool calls and observations) to verify the solution's effectiveness and the outcome of these verification steps. If the solution was not effective, log this and any subsequent troubleshooting or alternative actions.
-    9.  **Rollback (if applicable)**: If a rollback was performed, log the reason, the rollback procedure executed, and the state of the system post-rollback.
-    10. **Key Concepts Applied**: Briefly note any key Kubernetes concepts that were central to the diagnosis or resolution (for analytical/benchmarking purposes).
+    2.  **Prioritize Dry-Runs**: **Utilize `--dry-run` flags (or equivalent non-impact checks) whenever available before applying changes**
+    3.  **Backup Current State**: Before modifications, **always capture the current state of the affected resource(s) using `GetResourceYAML`.**
+    4.  **Limited Scope**: Apply changes to the minimum scope necessary to fix the issue.
+    5.  **Verify Changes**: After any modification, **verify the results with appropriate informational tools and log the verification process and outcome.**
+    6.  **Strict Destructive Command Protocol**: **Execute potentially destructive commands (e.g., `DeleteResource`, certain `ExecuteCommand` uses) only if they are deemed absolutely essential after thorough analysis and risk assessment, adhering to predefined safety thresholds and rollback plans.**
+
+    ## Autonomous Operation Response Structure
+
+    After your autonomous operation, provide complete transparency of your decision-making process and actions. Your response should follow this comprehensive structure:
+
+    1.  **Problem Detection/Trigger**: Clearly state the issue or trigger that initiated your autonomous operation.
+    2.  **Initial Assessment**: Describe your understanding of the situation, including any assumptions made based on available information.
+    3.  **Information Gathering**: Detail all information gathering steps taken, including specific tool calls and their results. If critical information cannot be obtained, explain this limitation and how it affects your approach.
+    4.  **Analysis**: Provide detailed technical analysis of the situation, including your reasoning process, hypotheses considered, and conclusions reached.
+    5.  **Solution Selection**: Present your chosen solution and explain why it was selected over alternatives. Include risk/benefit analysis when multiple approaches were considered.
+    6.  **Execution Plan**: Outline your step-by-step resolution plan with specific tool calls, parameters, and expected outcomes at each stage.
+    7.  **Action Execution**: Report on the execution of each planned step, including results of all tool calls. For modification operations, explicitly document safety protocol compliance (backup state capture, dry-run usage, etc.).
+    8.  **Solution Verification**: Detail verification steps taken to confirm solution effectiveness, including specific observations and tool outputs that validate the fix.
+    9.  **Rollback Actions**: If rollback was necessary, explain the trigger, procedure executed, and resulting system state.
+    10. **Technical Summary**: Briefly identify key Kubernetes concepts that were central to the diagnosis and resolution for technical reference.
 
     ## Limitations
 
     1.  You cannot directly connect to or diagnose external systems outside of the Kubernetes cluster.
     2.  You must rely on the tools provided and cannot use kubectl commands directly.
     3.  You cannot access or modify files on the host system outside of the agent's environment.
     4.  **The agent's actions impact target environments; all operations must prioritize safety, stability, and adherence to the principle of least privilege above all else.**
-
-    Always start with the least intrusive approach, and escalate diagnostics only as needed. If critical information is missing or high uncertainty exists, prioritize further information gathering or select the safest, least impactful course of action. Log any such decisions and their rationale extensively. The primary goal is to resolve the issue correctly and safely.
-
+    5. You CANNOT ask for confirmation or clarification or request any other user input. You are expected to operate independently and autonomously until the issues are fixed.
   tools:
   - mcpServer:
       toolNames:
 
@@ -11,5 +11,4 @@ spec:
       command: npx
       env:
         CONTEXT: kind-kagent
-        PATH: /home/rinor/work/explore/kagent/kagent/.venv/bin:/home/rinor/.bun/bin:/home/rinor/go/bin:/home/rinor/.krew/bin:/home/rinor/.gloo/bin:/home/rinor/.gloo-mesh/bin:/home/rinor/.local/bin:/home/rinor/.gloo-mesh/bin:/home/rinor/custom-env/scripts:/home/rinor/.pyenv/plugins/pyenv-virtualenv/shims:/home/rinor/.pyenv/shims:/home/rinor/.krew/bin:/home/rinor/.gloo/bin:/home/rinor/.gloo-mesh/bin:/home/rinor/.go/bin:/home/rinor/.bun/bin:/home/rinor/go/bin:/home/rinor/.local/share/pnpm:/home/rinor/.nvm/versions/node/v23.6.0/bin:/home/rinor/.krew/bin:/home/rinor/.gloo/bin:/home/rinor/.gloo-mesh/bin:/home/rinor/.local/bin:/home/rinor/.gloo-mesh/bin:/home/rinor/custom-env/scripts:/home/rinor/.pyenv/plugins/pyenv-virtualenv/shims:/home/rinor/.pyenv/bin:/home/rinor/.krew/bin:/home/rinor/.gloo/bin:/home/rinor/.gloo-mesh/bin:/home/rinor/.cargo/bin:/home/rinor/.atuin/bin:/home/rinor/.deno/bin:/home/rinor/.local/bin:/home/rinor/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin:/snap/bin:/home/rinor/.local/share/JetBrains/Toolbox/scripts:~/.local/bin:/usr/local/go/bin:/home/rinor/.vscode/extensions/ms-python.debugpy-2025.8.0-linux-x64/bundled/scripts/noConfigScripts:/home/rinor/.config/Code/User/globalStorage/github.copilot-chat/debugCommand:~/.local/bin:/usr/local/go/bin;/home/rinor/.bun/bin
   description: Check Kubernetes Cluster Fixed
@@ -4,9 +4,9 @@ scenario_dir="$1"
 challenge_file="$2"
 
 # Extract the challenge name and description from the YAML metadata file
-NAME=$(yq eval '.metadata.name' "$challenge_file")
-DESCRIPTION=$(yq eval '.spec.description' "$challenge_file")
-USER_PROMPT=$(yq eval '.spec.prompt' "$challenge_file")
+NAME=$(yq eval '.metadata.name' "${scenario_dir}/${challenge_file}")
+DESCRIPTION=$(yq eval '.spec.description' "${scenario_dir}/${challenge_file}")
+USER_PROMPT=$(yq eval '.spec.prompt' "${scenario_dir}/${challenge_file}")
 
 log() {
   echo "[$(date +'%Y-%m-%dT%H:%M:%S')] $1"
@@ -20,25 +20,32 @@ log "User Prompt: $USER_PROMPT"
 
 
 echo "Waiting for pods to be stable..."
-while kubectl --context ${CLUSTER_CTX} get pods -A | grep ContainerCreating; do sleep 5; done
-while kubectl --context ${CLUSTER_CTX} get pods -A | grep Terminating; do sleep 5; done
+while kubectl --context "${CLUSTER_CTX}" get pods -A | grep ContainerCreating; do sleep 5; done
+while kubectl --context "${CLUSTER_CTX}" get pods -A | grep Terminating; do sleep 5; done
 
 # Test baseline
-timeout --signal=INT 3m mocha ./test.js --timeout 10000 --retries 5
+log "Testing initial cluster state..."
+timeout --signal=INT 3m mocha "${scenario_dir}/test.js" --timeout 10000 --retries 5
+BASELINE_TEST_STATUS=$?
+
+if [ $BASELINE_TEST_STATUS -ne 0 ]; then
+    log "ERROR: Baseline test failed. The cluster is not in the right state to proceed with the challenge."
+    log "Exiting without breaking the environment."
+    exit 1
+fi
 
 # Break the environment by executing commands defined in each step of the challenge
 log "Breaking the environment..."
-STEPS_COUNT=$(yq '.spec.steps | length' "$challenge_file")
+STEPS_COUNT=$(yq '.spec.steps | length' "${scenario_dir}/${challenge_file}")
 for ((i=0; i<$STEPS_COUNT; i++)); do
-    yq ".spec.steps[$i].run" "$challenge_file" | while IFS= read -r cmd; do
-    echo "$cmd" >> "$challenge_file".$i.sh
+    yq ".spec.steps[$i].run" "${scenario_dir}/${challenge_file}" | while IFS= read -r cmd; do
+    echo "$cmd" >> "${scenario_dir}/${challenge_file}".$i.sh
     done
     echo "Waiting for pods to be stable..."
 while kubectl --context ${CLUSTER_CTX} get pods -A | grep ContainerCreating; do sleep 5; done
 while kubectl --context ${CLUSTER_CTX} get pods -A | grep Terminating; do sleep 5; done
 
-####TODO    sh "$challenge_file".$i.sh
-    sh "$challenge_file".$i.sh
+    sh "${scenario_dir}/${challenge_file}".$i.sh
 done
 rm -f "$challenge_file".*.sh
 echo "Waiting for pods to be stable..."
@@ -47,26 +54,26 @@ while kubectl --context ${CLUSTER_CTX} get pods -A | grep Terminating; do sleep
 kubectl --context ${CLUSTER_CTX} get pods -A
 
 log "Testing cluster after breaking..."
-timeout --signal=INT 1m mocha ./test.js --timeout 10000 || true
-
-# Try to fix the broken environment using the Agent Framework (apps/agent-framework) and OpenAI API
+timeout --signal=INT 1m mocha "${scenario_dir}/test.js" --timeout 10000 || true
 
-log "Trying to fix thekagent broken environment using the Agent Framework..."
+# Try to fix the broken environment using Kagent
+log "Trying to fix the kagent broken environment using Kagent..."
 
 # Pipe the output of kagent invoke to the thought log file
-touch $NAME.thought.log
-####TODO echo "$USER_PROMPT" | kagent invoke --agent "k8s-agent" --task - > $NAME.thought.log 2>&1
-timeout --signal=INT 3m bash -c 'echo "$1" | kagent invoke --agent "k8s-agent" --task -' -- "$USER_PROMPT" > $NAME.thought.log 2>&1
+touch "${scenario_dir}/$NAME.thought.log"
+mkdir -p "${scenario_dir}/results"
+
+timeout --signal=INT 3m bash -c 'echo "$1" | kagent invoke --agent "k8s-agent" --task -' -- "$USER_PROMPT" > "${scenario_dir}/$NAME.thought.log" 2>&1
 
 log "Testing cluster after fixing..."
 kubectl --context ${CLUSTER_CTX} get pods -A
-if mocha ./test.js --timeout 10000; then
+if mocha "${scenario_dir}/test.js" --timeout 10000; then
   log "---------------> challenge SUCCESSFUL <------------------"
-  rm -f $NAME.failure
-  cat $NAME.thought.log > results/$NAME.success
+  rm -f "${scenario_dir}/$NAME.failure" || true
+  mv "${scenario_dir}/$NAME.thought.log" "${scenario_dir}/results/$NAME.success"
 else
   log "---------------> challenge FAILED <----------------------"
-  rm -f $NAME.success
-  cat $NAME.thought.log > results/$NAME.failure
+  rm -f "${scenario_dir}/$NAME.success" || true
+  mv "${scenario_dir}/$NAME.thought.log" "${scenario_dir}/results/$NAME.failure"
 fi