Skip to content

Commit 91ad6ac

Browse files
committed
GHA evals
1 parent 19108f7 commit 91ad6ac

File tree

3 files changed

+584
-0
lines changed

3 files changed

+584
-0
lines changed
Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
name: Evals Quick Test
2+
3+
on:
4+
workflow_dispatch:
5+
6+
env:
7+
DOCKER_BUILDKIT: 1
8+
COMPOSE_DOCKER_CLI_BUILD: 1
9+
10+
jobs:
11+
test-docker-compose:
12+
name: Test Docker Compose Networking
13+
runs-on: ubuntu-latest
14+
timeout-minutes: 15
15+
16+
steps:
17+
- name: Checkout repository
18+
uses: actions/checkout@v4
19+
20+
- name: Set up Docker Buildx
21+
uses: docker/setup-buildx-action@v3
22+
23+
- name: Create test environment
24+
run: |
25+
cd packages/evals
26+
27+
# Create minimal test environment
28+
cat > .env.test << EOF
29+
NODE_ENV=test
30+
DATABASE_URL=postgresql://postgres:password@db:5432/evals_test
31+
REDIS_URL=redis://redis:6379
32+
HOST_EXECUTION_METHOD=docker
33+
EOF
34+
35+
- name: Build images
36+
run: |
37+
cd packages/evals
38+
docker compose build web runner
39+
40+
- name: Start server services
41+
run: |
42+
cd packages/evals
43+
docker compose --profile server up -d
44+
45+
- name: Test service connectivity
46+
run: |
47+
cd packages/evals
48+
49+
# Wait for services
50+
echo "Waiting for PostgreSQL..."
51+
timeout 60 bash -c 'until docker compose exec -T db pg_isready -U postgres; do sleep 2; done'
52+
53+
echo "Waiting for Redis..."
54+
timeout 60 bash -c 'until docker compose exec -T redis redis-cli ping | grep -q PONG; do sleep 2; done'
55+
56+
# Test inter-container networking
57+
echo "Testing database connection from web container..."
58+
docker compose exec -T web sh -c 'nc -z db 5432 && echo "✓ Database connection successful"'
59+
60+
echo "Testing Redis connection from web container..."
61+
docker compose exec -T web sh -c 'nc -z redis 6379 && echo "✓ Redis connection successful"'
62+
63+
# Test that web service can start (basic health check)
64+
echo "Testing web service startup..."
65+
timeout 30 bash -c 'until curl -f http://localhost:3000 2>/dev/null || curl -f http://localhost:3000/health 2>/dev/null; do sleep 2; done' || echo "Web service may not have health endpoint, continuing..."
66+
67+
- name: Test runner container networking
68+
run: |
69+
cd packages/evals
70+
71+
echo "Testing runner container can connect to services..."
72+
docker compose run --rm runner sh -c 'nc -z db 5432 && echo "✓ Runner -> Database connection successful"'
73+
docker compose run --rm runner sh -c 'nc -z redis 6379 && echo "✓ Runner -> Redis connection successful"'
74+
docker compose run --rm runner sh -c 'nc -z web 3000 && echo "✓ Runner -> Web service connection successful"'
75+
76+
- name: Verify Docker socket access
77+
run: |
78+
cd packages/evals
79+
80+
echo "Testing Docker socket access in runner..."
81+
docker compose run --rm runner docker --version
82+
docker compose run --rm runner docker ps
83+
84+
- name: Show service status
85+
if: always()
86+
run: |
87+
cd packages/evals
88+
echo "=== Service Status ==="
89+
docker compose ps
90+
91+
echo "=== Network Information ==="
92+
docker network ls | grep evals || echo "No evals network found"
93+
94+
echo "=== Container Information ==="
95+
docker compose exec -T db sh -c 'echo "Database container hostname: $(hostname)"'
96+
docker compose exec -T redis sh -c 'echo "Redis container hostname: $(hostname)"'
97+
98+
- name: Cleanup
99+
if: always()
100+
run: |
101+
cd packages/evals
102+
docker compose down -v --remove-orphans
103+
104+
validate-compose-file:
105+
name: Validate Compose Configuration
106+
runs-on: ubuntu-latest
107+
108+
steps:
109+
- name: Checkout repository
110+
uses: actions/checkout@v4
111+
112+
- name: Validate Docker Compose syntax
113+
run: |
114+
cd packages/evals
115+
docker compose config --quiet
116+
117+
- name: Check service definitions
118+
run: |
119+
cd packages/evals
120+
121+
# Verify all expected services are defined
122+
services=$(docker compose config --services | sort)
123+
expected_services="db redis runner web"
124+
125+
echo "Defined services: $services"
126+
echo "Expected services: $expected_services"
127+
128+
for service in $expected_services; do
129+
if ! echo "$services" | grep -q "^$service$"; then
130+
echo "ERROR: Service '$service' not found"
131+
exit 1
132+
fi
133+
done
134+
135+
echo "✓ All expected services found"
136+
137+
- name: Check profiles
138+
run: |
139+
cd packages/evals
140+
141+
# Test profile configurations
142+
echo "Testing server profile..."
143+
docker compose --profile server config --services | sort
144+
145+
echo "Testing runner profile..."
146+
docker compose --profile runner config --services | sort
147+
148+
echo "✓ Profiles validated"

.github/workflows/evals.yml

Lines changed: 235 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,235 @@
1+
name: Evals Docker Compose
2+
3+
on:
4+
workflow_dispatch:
5+
inputs:
6+
run_full_evals:
7+
description: 'Run full evaluation suite'
8+
required: false
9+
default: 'false'
10+
type: boolean
11+
concurrency:
12+
description: 'Evaluation concurrency level'
13+
required: false
14+
default: '2'
15+
type: string
16+
17+
env:
18+
DOCKER_BUILDKIT: 1
19+
COMPOSE_DOCKER_CLI_BUILD: 1
20+
21+
jobs:
22+
build-and-test:
23+
name: Build and Test Evals
24+
runs-on: ubuntu-latest
25+
timeout-minutes: 30
26+
27+
steps:
28+
- name: Checkout repository
29+
uses: actions/checkout@v4
30+
with:
31+
fetch-depth: 0
32+
33+
- name: Set up Docker Buildx
34+
uses: docker/setup-buildx-action@v3
35+
36+
- name: Create environment files
37+
run: |
38+
cd packages/evals
39+
40+
# Create .env.test for testing
41+
cat > .env.test << EOF
42+
NODE_ENV=test
43+
DATABASE_URL=postgresql://postgres:password@db:5432/evals_test
44+
REDIS_URL=redis://redis:6379
45+
HOST_EXECUTION_METHOD=docker
46+
EOF
47+
48+
# Create .env.development for development
49+
cat > .env.development << EOF
50+
NODE_ENV=development
51+
DATABASE_URL=postgresql://postgres:password@db:5432/evals_development
52+
REDIS_URL=redis://redis:6379
53+
HOST_EXECUTION_METHOD=docker
54+
EOF
55+
56+
- name: Build Docker images
57+
run: |
58+
cd packages/evals
59+
docker compose build web runner
60+
61+
- name: Start server services
62+
run: |
63+
cd packages/evals
64+
docker compose --profile server up -d
65+
66+
- name: Wait for services to be ready
67+
run: |
68+
cd packages/evals
69+
70+
# Wait for PostgreSQL to be ready
71+
echo "Waiting for PostgreSQL..."
72+
timeout 60 bash -c 'until docker compose exec -T db pg_isready -U postgres -d evals_development; do sleep 2; done'
73+
74+
# Wait for Redis to be ready
75+
echo "Waiting for Redis..."
76+
timeout 60 bash -c 'until docker compose exec -T redis redis-cli ping | grep -q PONG; do sleep 2; done'
77+
78+
# Wait for web service to be ready
79+
echo "Waiting for web service..."
80+
timeout 60 bash -c 'until curl -f http://localhost:3000/health 2>/dev/null || curl -f http://localhost:3000 2>/dev/null; do sleep 2; done'
81+
82+
- name: Run database migrations
83+
run: |
84+
cd packages/evals
85+
docker compose exec -T web pnpm db:push
86+
87+
- name: Run tests
88+
run: |
89+
cd packages/evals
90+
docker compose run --rm runner pnpm _test
91+
92+
- name: Check service logs on failure
93+
if: failure()
94+
run: |
95+
cd packages/evals
96+
echo "=== Database logs ==="
97+
docker compose logs db
98+
echo "=== Redis logs ==="
99+
docker compose logs redis
100+
echo "=== Web service logs ==="
101+
docker compose logs web
102+
103+
- name: Cleanup
104+
if: always()
105+
run: |
106+
cd packages/evals
107+
docker compose down -v --remove-orphans
108+
109+
run-sample-evals:
110+
name: Run Sample Evaluations
111+
runs-on: ubuntu-latest
112+
needs: build-and-test
113+
if: github.event.inputs.run_full_evals == 'true' || github.event_name == 'workflow_dispatch'
114+
timeout-minutes: 60
115+
116+
steps:
117+
- name: Checkout repository
118+
uses: actions/checkout@v4
119+
120+
- name: Set up Docker Buildx
121+
uses: docker/setup-buildx-action@v3
122+
123+
- name: Create environment files
124+
run: |
125+
cd packages/evals
126+
127+
cat > .env.local << EOF
128+
OPENROUTER_API_KEY=${{ secrets.OPENROUTER_API_KEY }}
129+
EOF
130+
131+
cat > .env.development << EOF
132+
NODE_ENV=development
133+
DATABASE_URL=postgresql://postgres:password@db:5432/evals_development
134+
REDIS_URL=redis://redis:6379
135+
HOST_EXECUTION_METHOD=docker
136+
EOF
137+
138+
- name: Build and start services
139+
run: |
140+
cd packages/evals
141+
docker compose --profile server --profile runner up --build -d --scale runner=0
142+
143+
- name: Wait for services
144+
run: |
145+
cd packages/evals
146+
timeout 120 bash -c 'until docker compose exec -T db pg_isready -U postgres -d evals_development; do sleep 2; done'
147+
timeout 60 bash -c 'until docker compose exec -T redis redis-cli ping | grep -q PONG; do sleep 2; done'
148+
timeout 60 bash -c 'until curl -f http://localhost:3000 2>/dev/null; do sleep 2; done'
149+
150+
- name: Run database setup
151+
run: |
152+
cd packages/evals
153+
docker compose exec -T web pnpm db:push
154+
155+
- name: Run sample evaluation
156+
env:
157+
CONCURRENCY: ${{ github.event.inputs.concurrency || '2' }}
158+
run: |
159+
cd packages/evals
160+
161+
# Run a limited set of evaluations for CI
162+
docker compose run --rm runner pnpm cli run \
163+
--concurrency $CONCURRENCY \
164+
--timeout 300 \
165+
--max-exercises 3 \
166+
--model "anthropic/claude-3-5-sonnet-20241022"
167+
168+
- name: Upload evaluation results
169+
if: always()
170+
uses: actions/upload-artifact@v4
171+
with:
172+
name: evaluation-results
173+
path: |
174+
packages/evals/results/
175+
packages/evals/logs/
176+
retention-days: 7
177+
178+
- name: Cleanup
179+
if: always()
180+
run: |
181+
cd packages/evals
182+
docker compose down -v --remove-orphans
183+
184+
security-scan:
185+
name: Security Scan
186+
runs-on: ubuntu-latest
187+
needs: build-and-test
188+
189+
steps:
190+
- name: Checkout repository
191+
uses: actions/checkout@v4
192+
193+
- name: Run Trivy vulnerability scanner
194+
uses: aquasecurity/trivy-action@master
195+
with:
196+
scan-type: 'fs'
197+
scan-ref: 'packages/evals'
198+
format: 'sarif'
199+
output: 'trivy-results.sarif'
200+
201+
- name: Upload Trivy scan results
202+
uses: github/codeql-action/upload-sarif@v3
203+
if: always()
204+
with:
205+
sarif_file: 'trivy-results.sarif'
206+
207+
docker-compose-validate:
208+
name: Validate Docker Compose
209+
runs-on: ubuntu-latest
210+
211+
steps:
212+
- name: Checkout repository
213+
uses: actions/checkout@v4
214+
215+
- name: Validate Docker Compose file
216+
run: |
217+
cd packages/evals
218+
docker compose config --quiet
219+
220+
- name: Check Docker Compose services
221+
run: |
222+
cd packages/evals
223+
docker compose config --services | sort > services.txt
224+
echo "Available services:"
225+
cat services.txt
226+
227+
# Verify expected services exist
228+
for service in db redis web runner; do
229+
if ! grep -q "^$service$" services.txt; then
230+
echo "ERROR: Service '$service' not found in docker-compose.yml"
231+
exit 1
232+
fi
233+
done
234+
235+
echo "All expected services found ✓"

0 commit comments

Comments
 (0)