Run Kubernetes Agent Benchmark #8

Workflow file for this run

.github/workflows/run-agent-framework-test.yaml at d7ccdb8

	name: Run PE Test

	on:
	workflow_dispatch:
	env:
	CLUSTER_CTX: kind-kagent
	OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
	QDRANT_API_KEY: ${{ secrets.QDRANT_API_KEY }}

	jobs:
	solve-challenge:
	outputs:
	scn1_ch1: ${{ steps.gen_output.outputs.scn1_ch1 }}
	# scn1_ch2: ${{ steps.gen_output.outputs.scn1_ch2 }}
	# scn1_ch3: ${{ steps.gen_output.outputs.scn1_ch3 }}
	# scn1_ch4: ${{ steps.gen_output.outputs.scn1_ch4 }}
	# scn1_ch5: ${{ steps.gen_output.outputs.scn1_ch5 }}
	# scn1_ch6: ${{ steps.gen_output.outputs.scn1_ch6 }}
	# scn1_ch7: ${{ steps.gen_output.outputs.scn1_ch7 }}
	# scn1_ch8: ${{ steps.gen_output.outputs.scn1_ch8 }}
	# scn1_ch9: ${{ steps.gen_output.outputs.scn1_ch9 }}
	# scn1_ch10: ${{ steps.gen_output.outputs.scn1_ch10 }}
	# scn1_ch11: ${{ steps.gen_output.outputs.scn1_ch11 }}
	# scn1_ch12: ${{ steps.gen_output.outputs.scn1_ch12 }}
	# scn1_ch13: ${{ steps.gen_output.outputs.scn1_ch13 }}
	# scn1_ch14: ${{ steps.gen_output.outputs.scn1_ch14 }}
	# scn1_ch15: ${{ steps.gen_output.outputs.scn1_ch15 }}
	# scn1_ch16: ${{ steps.gen_output.outputs.scn1_ch16 }}
	# scn1_ch17: ${{ steps.gen_output.outputs.scn1_ch17 }}
	# scn1_ch18: ${{ steps.gen_output.outputs.scn1_ch18 }}
	# scn1_ch19: ${{ steps.gen_output.outputs.scn1_ch19 }}
	# scn1_ch20: ${{ steps.gen_output.outputs.scn1_ch20 }}
	# scn1_ch21: ${{ steps.gen_output.outputs.scn1_ch21 }}
	# scn1_ch22: ${{ steps.gen_output.outputs.scn1_ch22 }}
	# scn1_ch23: ${{ steps.gen_output.outputs.scn1_ch23 }}
	# scn1_ch24: ${{ steps.gen_output.outputs.scn1_ch24 }}
	name: ${{ matrix.challenges.scenario }}/${{ matrix.challenges.challenge }}
	strategy:
	fail-fast: false
	matrix:
	challenges:
	- { name: scn1_ch1, scenario: scenario1, challenge: configmap-missing-key.yaml}
	# - { name: scn1_ch2, scenario: scenario1, challenge: configmap-misspelled.yaml}
	# - { name: scn1_ch3, scenario: scenario1, challenge: configmap-readonly.yaml}
	# - { name: scn1_ch4, scenario: scenario1, challenge: deployment-env-mismatch.yaml}
	# - { name: scn1_ch5, scenario: scenario1, challenge: deployment-impossible-pod-affinity.yaml}
	# - { name: scn1_ch6, scenario: scenario1, challenge: deployment-low-resources.yaml}
	# - { name: scn1_ch7, scenario: scenario1, challenge: deployment-pod-affinity-wrong-key.yaml}
	# - { name: scn1_ch8, scenario: scenario1, challenge: deployment-probe-failures.yaml}
	# - { name: scn1_ch9, scenario: scenario1, challenge: deployment-scaled-down.yaml}
	# - { name: scn1_ch10, scenario: scenario1, challenge: network-policy.yaml}
	# - { name: scn1_ch11, scenario: scenario1, challenge: pvc-wrong-accessmode.yaml}
	# - { name: scn1_ch12, scenario: scenario1, challenge: pvc-wrong-storageclass.yaml}
	# - { name: scn1_ch13, scenario: scenario1, challenge: secret-missing.yaml}
	# - { name: scn1_ch14, scenario: scenario1, challenge: secret-not-mounted.yaml}
	# - { name: scn1_ch15, scenario: scenario1, challenge: service-no-endpoint.yaml}
	# - { name: scn1_ch16, scenario: scenario1, challenge: serviceaccount-misspelled.yaml}
	# - { name: scn1_ch17, scenario: scenario1, challenge: serviceaccount-permissions.yaml}
	# - { name: scn1_ch18, scenario: scenario1, challenge: pod-hostport-conflict.yaml}
	# - { name: scn1_ch19, scenario: scenario1, challenge: pod-limit-range-exceeded.yaml}
	# - { name: scn1_ch20, scenario: scenario1, challenge: pod-resource-quota-exceeded.yaml}
	# - { name: scn1_ch21, scenario: scenario1, challenge: service-dns-resolution-fail.yaml}
	# - { name: scn1_ch22, scenario: scenario1, challenge: service-incorrect-port-number.yaml}
	# - { name: scn1_ch23, scenario: scenario1, challenge: missing-service-selector.yaml}
	# - { name: scn1_ch24, scenario: scenario1, challenge: pod-security-context-issue.yaml}
	runs-on: ['ubuntu-latest']

	steps:
	- uses: actions/checkout@v4

	- name: Set up Docker Buildx
	uses: docker/setup-buildx-action@v3

	- name: 'Build Images'
	env:
	DOCKER_BUILD_ARGS: "--load --platform linux/amd64"
	DOCKER_BUILDER: "docker buildx"
	BUILD_ARGS: "--platform linux/amd64"
	run: \|
	bash .github/data/agent-framework/0.setup.sh

	- name: Gather challenge information
	working-directory: .github/data/agent-framework/${{ matrix.challenges.scenario }}
	run: \|
	cat ${{ matrix.challenges.challenge }} \| yq .spec.prompt

	- name: Gather challenge information
	working-directory: .github/data/agent-framework/${{ matrix.challenges.scenario }}
	run: \|
	cat ${{ matrix.challenges.challenge }} \| yq .spec.prompt

	- name: Setup scenario
	working-directory: .github/data/agent-framework/${{ matrix.challenges.scenario }}
	timeout-minutes: 3
	env:
	CLUSTER_CTX: kind-kagent
	run: \|
	bash "./run.sh"

	- name: Run Test
	working-directory: .github/data/agent-framework/${{ matrix.challenges.scenario }}
	timeout-minutes: 3
	env:
	CLUSTER_CTX: kind-kagent
	run: \|
	../run-challenge.sh "${{ matrix.challenges.scenario }}" "${{ matrix.challenges.challenge }}"


	- name: Test final state
	working-directory: .github/data/agent-framework/${{ matrix.challenges.scenario }}
	timeout-minutes: 1
	env:
	CLUSTER_CTX: kind-kagent
	run: \|
	kubectl --context ${CLUSTER_CTX} get pods -A
	timeout --signal=INT 3m mocha ./test.js --timeout 10000 --retries 5

	- name: Save result
	working-directory: .github/data/agent-framework/${{ matrix.challenges.scenario }}
	id: gen_output
	if: always()
	run: \|
	NAME=$(cat ${{ matrix.challenges.challenge }} \| yq .metadata.name)
	DESCRIPTION=$(cat ${{ matrix.challenges.challenge }} \| yq .spec.description)
	job_outcome=$(echo '{}' \| jq '. + {status: "${{ job.status }}"}' \| jq --arg name "$NAME" '. + {name: $name}' \| jq --arg description "$DESCRIPTION" '. + {description: $description}' \| jq tostring)
	echo "${{ matrix.challenges.name }}=${job_outcome}" >> "$GITHUB_OUTPUT"

	summarize-results:
	needs: [solve-challenge]
	if: always()
	runs-on: ['ubuntu-latest']
	steps:
	- name: Summarize results
	run: \|
	cat << 'EOF' \| jq 'with_entries(.value \|= (fromjson \| fromjson) \| .value \|= del(.completion_tokens_details) \| .value \|= del(.prompt_tokens_details))' \| yq eval -P > out.yaml
	${{ toJSON(needs.solve-challenge.outputs) }}
	EOF
	cat out.yaml
	echo "success: $(cat out.yaml\|grep "status: success" \|wc -l)"
	echo "failures: $(cat out.yaml\|grep "status: failure" \|wc -l)"

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Run Kubernetes Agent Benchmark #8

Workflow file

Run Kubernetes Agent Benchmark #8

Uh oh!

Workflow file for this run