Skip to content

Run Kubernetes Agent Benchmark #11

Run Kubernetes Agent Benchmark

Run Kubernetes Agent Benchmark #11

name: Run PE Test
on:
workflow_dispatch:
env:
CLUSTER_CTX: kind-kagent
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
QDRANT_API_KEY: ${{ secrets.QDRANT_API_KEY }}
jobs:
solve-challenge:
outputs:
scn1_ch1: ${{ steps.gen_output.outputs.scn1_ch1 }}
# scn1_ch2: ${{ steps.gen_output.outputs.scn1_ch2 }}
# scn1_ch3: ${{ steps.gen_output.outputs.scn1_ch3 }}
# scn1_ch4: ${{ steps.gen_output.outputs.scn1_ch4 }}
# scn1_ch5: ${{ steps.gen_output.outputs.scn1_ch5 }}
# scn1_ch6: ${{ steps.gen_output.outputs.scn1_ch6 }}
# scn1_ch7: ${{ steps.gen_output.outputs.scn1_ch7 }}
# scn1_ch8: ${{ steps.gen_output.outputs.scn1_ch8 }}
# scn1_ch9: ${{ steps.gen_output.outputs.scn1_ch9 }}
# scn1_ch10: ${{ steps.gen_output.outputs.scn1_ch10 }}
# scn1_ch11: ${{ steps.gen_output.outputs.scn1_ch11 }}
# scn1_ch12: ${{ steps.gen_output.outputs.scn1_ch12 }}
# scn1_ch13: ${{ steps.gen_output.outputs.scn1_ch13 }}
# scn1_ch14: ${{ steps.gen_output.outputs.scn1_ch14 }}
# scn1_ch15: ${{ steps.gen_output.outputs.scn1_ch15 }}
# scn1_ch16: ${{ steps.gen_output.outputs.scn1_ch16 }}
# scn1_ch17: ${{ steps.gen_output.outputs.scn1_ch17 }}
# scn1_ch18: ${{ steps.gen_output.outputs.scn1_ch18 }}
# scn1_ch19: ${{ steps.gen_output.outputs.scn1_ch19 }}
# scn1_ch20: ${{ steps.gen_output.outputs.scn1_ch20 }}
# scn1_ch21: ${{ steps.gen_output.outputs.scn1_ch21 }}
# scn1_ch22: ${{ steps.gen_output.outputs.scn1_ch22 }}
# scn1_ch23: ${{ steps.gen_output.outputs.scn1_ch23 }}
# scn1_ch24: ${{ steps.gen_output.outputs.scn1_ch24 }}
name: ${{ matrix.challenges.scenario }}/${{ matrix.challenges.challenge }}
strategy:
fail-fast: false
matrix:
challenges:
- { name: scn1_ch1, scenario: scenario1, challenge: configmap-missing-key.yaml}
# - { name: scn1_ch2, scenario: scenario1, challenge: configmap-misspelled.yaml}
# - { name: scn1_ch3, scenario: scenario1, challenge: configmap-readonly.yaml}
# - { name: scn1_ch4, scenario: scenario1, challenge: deployment-env-mismatch.yaml}
# - { name: scn1_ch5, scenario: scenario1, challenge: deployment-impossible-pod-affinity.yaml}
# - { name: scn1_ch6, scenario: scenario1, challenge: deployment-low-resources.yaml}
# - { name: scn1_ch7, scenario: scenario1, challenge: deployment-pod-affinity-wrong-key.yaml}
# - { name: scn1_ch8, scenario: scenario1, challenge: deployment-probe-failures.yaml}
# - { name: scn1_ch9, scenario: scenario1, challenge: deployment-scaled-down.yaml}
# - { name: scn1_ch10, scenario: scenario1, challenge: network-policy.yaml}
# - { name: scn1_ch11, scenario: scenario1, challenge: pvc-wrong-accessmode.yaml}
# - { name: scn1_ch12, scenario: scenario1, challenge: pvc-wrong-storageclass.yaml}
# - { name: scn1_ch13, scenario: scenario1, challenge: secret-missing.yaml}
# - { name: scn1_ch14, scenario: scenario1, challenge: secret-not-mounted.yaml}
# - { name: scn1_ch15, scenario: scenario1, challenge: service-no-endpoint.yaml}
# - { name: scn1_ch16, scenario: scenario1, challenge: serviceaccount-misspelled.yaml}
# - { name: scn1_ch17, scenario: scenario1, challenge: serviceaccount-permissions.yaml}
# - { name: scn1_ch18, scenario: scenario1, challenge: pod-hostport-conflict.yaml}
# - { name: scn1_ch19, scenario: scenario1, challenge: pod-limit-range-exceeded.yaml}
# - { name: scn1_ch20, scenario: scenario1, challenge: pod-resource-quota-exceeded.yaml}
# - { name: scn1_ch21, scenario: scenario1, challenge: service-dns-resolution-fail.yaml}
# - { name: scn1_ch22, scenario: scenario1, challenge: service-incorrect-port-number.yaml}
# - { name: scn1_ch23, scenario: scenario1, challenge: missing-service-selector.yaml}
# - { name: scn1_ch24, scenario: scenario1, challenge: pod-security-context-issue.yaml}
runs-on: ['ubuntu-latest']
steps:
- uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: 'Build Images'
env:
DOCKER_BUILD_ARGS: "--load --platform linux/amd64"
DOCKER_BUILDER: "docker buildx"
BUILD_ARGS: "--platform linux/amd64"
run: |
bash .github/data/agent-framework/0.setup.sh
kagent version
- name: Gather challenge information
working-directory: .github/data/agent-framework/${{ matrix.challenges.scenario }}
run: |
cat ${{ matrix.challenges.challenge }} | yq .spec.prompt
- name: Gather challenge information
working-directory: .github/data/agent-framework/${{ matrix.challenges.scenario }}
run: |
cat ${{ matrix.challenges.challenge }} | yq .spec.prompt
- name: Setup scenario
working-directory: .github/data/agent-framework/${{ matrix.challenges.scenario }}
timeout-minutes: 3
env:
CLUSTER_CTX: kind-kagent
run: |
bash "./run.sh"
pnpm i || npm i
npm install -g mocha
- name: Run Test
working-directory: .github/data/agent-framework/${{ matrix.challenges.scenario }}
timeout-minutes: 3
env:
CLUSTER_CTX: kind-kagent
run: |
../run-challenge.sh "${{ matrix.challenges.scenario }}" "${{ matrix.challenges.challenge }}"
- name: Setup upterm session
if: failure()
uses: lhotari/action-upterm@v1
- name: Test final state
working-directory: .github/data/agent-framework/${{ matrix.challenges.scenario }}
timeout-minutes: 1
env:
CLUSTER_CTX: kind-kagent
run: |
kubectl --context ${CLUSTER_CTX} get pods -A
timeout --signal=INT 3m mocha ./test.js --timeout 10000 --retries 5
- name: Save result
working-directory: .github/data/agent-framework/${{ matrix.challenges.scenario }}
id: gen_output
if: always()
run: |
NAME=$(cat ${{ matrix.challenges.challenge }} | yq .metadata.name)
DESCRIPTION=$(cat ${{ matrix.challenges.challenge }} | yq .spec.description)
job_outcome=$(echo '{}' | jq '. + {status: "${{ job.status }}"}' | jq --arg name "$NAME" '. + {name: $name}' | jq --arg description "$DESCRIPTION" '. + {description: $description}' | jq tostring)
echo "${{ matrix.challenges.name }}=${job_outcome}" >> "$GITHUB_OUTPUT"
summarize-results:
needs: [solve-challenge]
if: always()
runs-on: ['ubuntu-latest']
steps:
- name: Summarize results
run: |
cat << 'EOF' | jq 'with_entries(.value |= (fromjson | fromjson) | .value |= del(.completion_tokens_details) | .value |= del(.prompt_tokens_details))' | yq eval -P > out.yaml
${{ toJSON(needs.solve-challenge.outputs) }}
EOF
cat out.yaml
echo "success: $(cat out.yaml|grep "status: success" |wc -l)"
echo "failures: $(cat out.yaml|grep "status: failure" |wc -l)"