Run Kubernetes Agent Benchmark #8
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Run PE Test | |
| on: | |
| workflow_dispatch: | |
| env: | |
| CLUSTER_CTX: kind-kagent | |
| OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} | |
| QDRANT_API_KEY: ${{ secrets.QDRANT_API_KEY }} | |
| jobs: | |
| solve-challenge: | |
| outputs: | |
| scn1_ch1: ${{ steps.gen_output.outputs.scn1_ch1 }} | |
| # scn1_ch2: ${{ steps.gen_output.outputs.scn1_ch2 }} | |
| # scn1_ch3: ${{ steps.gen_output.outputs.scn1_ch3 }} | |
| # scn1_ch4: ${{ steps.gen_output.outputs.scn1_ch4 }} | |
| # scn1_ch5: ${{ steps.gen_output.outputs.scn1_ch5 }} | |
| # scn1_ch6: ${{ steps.gen_output.outputs.scn1_ch6 }} | |
| # scn1_ch7: ${{ steps.gen_output.outputs.scn1_ch7 }} | |
| # scn1_ch8: ${{ steps.gen_output.outputs.scn1_ch8 }} | |
| # scn1_ch9: ${{ steps.gen_output.outputs.scn1_ch9 }} | |
| # scn1_ch10: ${{ steps.gen_output.outputs.scn1_ch10 }} | |
| # scn1_ch11: ${{ steps.gen_output.outputs.scn1_ch11 }} | |
| # scn1_ch12: ${{ steps.gen_output.outputs.scn1_ch12 }} | |
| # scn1_ch13: ${{ steps.gen_output.outputs.scn1_ch13 }} | |
| # scn1_ch14: ${{ steps.gen_output.outputs.scn1_ch14 }} | |
| # scn1_ch15: ${{ steps.gen_output.outputs.scn1_ch15 }} | |
| # scn1_ch16: ${{ steps.gen_output.outputs.scn1_ch16 }} | |
| # scn1_ch17: ${{ steps.gen_output.outputs.scn1_ch17 }} | |
| # scn1_ch18: ${{ steps.gen_output.outputs.scn1_ch18 }} | |
| # scn1_ch19: ${{ steps.gen_output.outputs.scn1_ch19 }} | |
| # scn1_ch20: ${{ steps.gen_output.outputs.scn1_ch20 }} | |
| # scn1_ch21: ${{ steps.gen_output.outputs.scn1_ch21 }} | |
| # scn1_ch22: ${{ steps.gen_output.outputs.scn1_ch22 }} | |
| # scn1_ch23: ${{ steps.gen_output.outputs.scn1_ch23 }} | |
| # scn1_ch24: ${{ steps.gen_output.outputs.scn1_ch24 }} | |
| name: ${{ matrix.challenges.scenario }}/${{ matrix.challenges.challenge }} | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| challenges: | |
| - { name: scn1_ch1, scenario: scenario1, challenge: configmap-missing-key.yaml} | |
| # - { name: scn1_ch2, scenario: scenario1, challenge: configmap-misspelled.yaml} | |
| # - { name: scn1_ch3, scenario: scenario1, challenge: configmap-readonly.yaml} | |
| # - { name: scn1_ch4, scenario: scenario1, challenge: deployment-env-mismatch.yaml} | |
| # - { name: scn1_ch5, scenario: scenario1, challenge: deployment-impossible-pod-affinity.yaml} | |
| # - { name: scn1_ch6, scenario: scenario1, challenge: deployment-low-resources.yaml} | |
| # - { name: scn1_ch7, scenario: scenario1, challenge: deployment-pod-affinity-wrong-key.yaml} | |
| # - { name: scn1_ch8, scenario: scenario1, challenge: deployment-probe-failures.yaml} | |
| # - { name: scn1_ch9, scenario: scenario1, challenge: deployment-scaled-down.yaml} | |
| # - { name: scn1_ch10, scenario: scenario1, challenge: network-policy.yaml} | |
| # - { name: scn1_ch11, scenario: scenario1, challenge: pvc-wrong-accessmode.yaml} | |
| # - { name: scn1_ch12, scenario: scenario1, challenge: pvc-wrong-storageclass.yaml} | |
| # - { name: scn1_ch13, scenario: scenario1, challenge: secret-missing.yaml} | |
| # - { name: scn1_ch14, scenario: scenario1, challenge: secret-not-mounted.yaml} | |
| # - { name: scn1_ch15, scenario: scenario1, challenge: service-no-endpoint.yaml} | |
| # - { name: scn1_ch16, scenario: scenario1, challenge: serviceaccount-misspelled.yaml} | |
| # - { name: scn1_ch17, scenario: scenario1, challenge: serviceaccount-permissions.yaml} | |
| # - { name: scn1_ch18, scenario: scenario1, challenge: pod-hostport-conflict.yaml} | |
| # - { name: scn1_ch19, scenario: scenario1, challenge: pod-limit-range-exceeded.yaml} | |
| # - { name: scn1_ch20, scenario: scenario1, challenge: pod-resource-quota-exceeded.yaml} | |
| # - { name: scn1_ch21, scenario: scenario1, challenge: service-dns-resolution-fail.yaml} | |
| # - { name: scn1_ch22, scenario: scenario1, challenge: service-incorrect-port-number.yaml} | |
| # - { name: scn1_ch23, scenario: scenario1, challenge: missing-service-selector.yaml} | |
| # - { name: scn1_ch24, scenario: scenario1, challenge: pod-security-context-issue.yaml} | |
| runs-on: ['ubuntu-latest'] | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Set up Docker Buildx | |
| uses: docker/setup-buildx-action@v3 | |
| - name: 'Build Images' | |
| env: | |
| DOCKER_BUILD_ARGS: "--load --platform linux/amd64" | |
| DOCKER_BUILDER: "docker buildx" | |
| BUILD_ARGS: "--platform linux/amd64" | |
| run: | | |
| bash .github/data/agent-framework/0.setup.sh | |
| - name: Gather challenge information | |
| working-directory: .github/data/agent-framework/${{ matrix.challenges.scenario }} | |
| run: | | |
| cat ${{ matrix.challenges.challenge }} | yq .spec.prompt | |
| - name: Gather challenge information | |
| working-directory: .github/data/agent-framework/${{ matrix.challenges.scenario }} | |
| run: | | |
| cat ${{ matrix.challenges.challenge }} | yq .spec.prompt | |
| - name: Setup scenario | |
| working-directory: .github/data/agent-framework/${{ matrix.challenges.scenario }} | |
| timeout-minutes: 3 | |
| env: | |
| CLUSTER_CTX: kind-kagent | |
| run: | | |
| bash "./run.sh" | |
| - name: Run Test | |
| working-directory: .github/data/agent-framework/${{ matrix.challenges.scenario }} | |
| timeout-minutes: 3 | |
| env: | |
| CLUSTER_CTX: kind-kagent | |
| run: | | |
| ../run-challenge.sh "${{ matrix.challenges.scenario }}" "${{ matrix.challenges.challenge }}" | |
| - name: Test final state | |
| working-directory: .github/data/agent-framework/${{ matrix.challenges.scenario }} | |
| timeout-minutes: 1 | |
| env: | |
| CLUSTER_CTX: kind-kagent | |
| run: | | |
| kubectl --context ${CLUSTER_CTX} get pods -A | |
| timeout --signal=INT 3m mocha ./test.js --timeout 10000 --retries 5 | |
| - name: Save result | |
| working-directory: .github/data/agent-framework/${{ matrix.challenges.scenario }} | |
| id: gen_output | |
| if: always() | |
| run: | | |
| NAME=$(cat ${{ matrix.challenges.challenge }} | yq .metadata.name) | |
| DESCRIPTION=$(cat ${{ matrix.challenges.challenge }} | yq .spec.description) | |
| job_outcome=$(echo '{}' | jq '. + {status: "${{ job.status }}"}' | jq --arg name "$NAME" '. + {name: $name}' | jq --arg description "$DESCRIPTION" '. + {description: $description}' | jq tostring) | |
| echo "${{ matrix.challenges.name }}=${job_outcome}" >> "$GITHUB_OUTPUT" | |
| summarize-results: | |
| needs: [solve-challenge] | |
| if: always() | |
| runs-on: ['ubuntu-latest'] | |
| steps: | |
| - name: Summarize results | |
| run: | | |
| cat << 'EOF' | jq 'with_entries(.value |= (fromjson | fromjson) | .value |= del(.completion_tokens_details) | .value |= del(.prompt_tokens_details))' | yq eval -P > out.yaml | |
| ${{ toJSON(needs.solve-challenge.outputs) }} | |
| EOF | |
| cat out.yaml | |
| echo "success: $(cat out.yaml|grep "status: success" |wc -l)" | |
| echo "failures: $(cat out.yaml|grep "status: failure" |wc -l)" |