Skip to content

Commit f56341d

Browse files
authored
Olmo-eval cloudrun db connection (#570)
Adds logic to use PGPASSWORD, or fetch the db password form the AWS Secrets Manager based on presence of DB_SECRET_ARN env var. Also adds VPC direct egress settings, so evaluation jobs egress on static IP or range. The static IP was set up in the project separately following https://docs.cloud.google.com/run/docs/configuring/static-outbound-ip Also was able to bring the app back into the root uv workspaces. The initial reason was to avoid dependency and python conflicts with the other apps. Now that the flask api is gone, I was able to make the olmo-eval-internal app a dependency only for the docker image and include the evaluations repo back into the root uv.
1 parent cf5ce37 commit f56341d

File tree

16 files changed

+227
-2431
lines changed

16 files changed

+227
-2431
lines changed

.github/workflows/build-and-push-evals.yml

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,6 @@ on:
77
paths:
88
- 'apps/evaluations/**'
99
- '.github/workflows/build-and-push-evals.yml'
10-
pull_request:
11-
paths:
12-
- 'apps/evaluations/**'
13-
- '.github/workflows/build-and-push-evals.yml'
1410
workflow_dispatch:
1511

1612
permissions:
@@ -23,21 +19,7 @@ env:
2319
REPO: model-evals
2420

2521
jobs:
26-
test:
27-
runs-on: ubuntu-latest
28-
steps:
29-
- uses: actions/checkout@v6
30-
31-
- name: Setup uv
32-
uses: astral-sh/setup-uv@v7
33-
34-
- name: Run Tests
35-
working-directory: apps/evaluations
36-
run: uv run --only-group dev pytest -v
37-
3822
build-and-deploy:
39-
needs: test
40-
if: github.event_name == 'push' || github.event_name == 'workflow_dispatch'
4123
runs-on: ubuntu-latest
4224
environment:
4325
name: ${{ github.ref_name }}

apps/evaluations/.env.local.example

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,11 @@ GITHUB_TOKEN=
77
# Required for running evaluations
88
LITELLM_PROXY_API_KEY=
99

10-
# Required for storage (Postgres)
10+
# Required for storage (Postgres, S3)
11+
# PGPASSWORD or DB_SECRET_ARN for fetching Postgres password from secrets manager
1112
PGHOST=
1213
PGPASSWORD=
13-
14-
# Required for storage (S3)
14+
DB_SECRET_ARN=
1515
AWS_ACCESS_KEY_ID=
1616
AWS_SECRET_ACCESS_KEY=
1717

apps/evaluations/Dockerfile

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,14 @@ RUN --mount=type=secret,id=GITHUB_TOKEN \
3737

3838
WORKDIR /app
3939

40-
# Copy evaluations package
40+
# Copy and install requirements (olmo-eval-internal from private repo)
41+
COPY requirements.txt /app/requirements.txt
42+
RUN --mount=type=cache,target=/root/.cache/uv \
43+
uv pip install --system -r /app/requirements.txt
44+
45+
# Copy and install evaluations package
4146
COPY src /app/src
4247
COPY pyproject.toml /app/pyproject.toml
43-
44-
# Install evaluations package (pulls olmo-eval-internal from git)
4548
RUN --mount=type=cache,target=/root/.cache/uv \
4649
uv pip install --system /app
4750

apps/evaluations/README.md

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,16 @@ gcloud run jobs execute eval --region us-west1 \
273273

274274
## CLI Reference
275275

276-
The `evaluations` CLI is available inside the container:
276+
The `evaluations` CLI requires `olmo-eval-internal` which is only installed inside the Docker container
277+
(not in the local dev environment). Use `run-local` to run evaluations locally via Docker:
278+
279+
```bash
280+
# Run evaluations locally via Docker (recommended)
281+
uv run run-local --tier smoke --task-index 0
282+
uv run run-local --tier standard --task-index 0 --with-storage
283+
```
284+
285+
Inside the container, you can use the CLI directly:
277286

278287
```bash
279288
# Auto-run tier from env vars (used by Cloud Run)

apps/evaluations/pyproject.toml

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,14 @@ description = "OLMo evaluation runner for Cloud Run Jobs"
55
requires-python = ">=3.12"
66

77
dependencies = [
8-
"olmo-eval-internal[litellm,storage] @ git+https://github.com/allenai/olmo-eval-internal.git@main",
98
"pydantic-settings>=2.0",
9+
"boto3>=1.35",
1010
]
1111

1212
[dependency-groups]
1313
dev = [
1414
"pytest>=8.0",
15-
"pydantic-settings>=2.0",
15+
"boto3-stubs[secretsmanager]>=1.35",
1616
]
1717

1818
[project.scripts]
@@ -24,9 +24,6 @@ run-local = "evaluations.run_local:main"
2424
requires = ["hatchling"]
2525
build-backend = "hatchling.build"
2626

27-
[tool.hatch.metadata]
28-
allow-direct-references = true
29-
3027
[tool.hatch.build.targets.wheel]
3128
packages = ["src/evaluations"]
3229

apps/evaluations/requirements.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# Runtime dependencies for Docker image (separate from evaluations package)
2+
olmo-eval-internal[litellm,storage] @ git+https://github.com/allenai/olmo-eval-internal.git@main

apps/evaluations/src/evaluations/cli.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,64 @@
11
"""CLI for evaluations Cloud Run Jobs."""
22

33
import argparse
4+
import json
5+
import os
46
import subprocess
57
import sys
68

9+
import boto3
10+
from botocore.exceptions import BotoCoreError, ClientError
11+
712
from evaluations.configs import ModelEval, TierName, get_tier
813
from evaluations.logging import logger
914
from evaluations.settings import settings
1015

1116

17+
def get_aws_secret_value(secret_arn: str, key: str | None = None) -> str:
18+
"""Fetch a secret value from AWS Secrets Manager.
19+
20+
Args:
21+
secret_arn: The ARN of the secret.
22+
key: If provided, parse secret as JSON and extract this key.
23+
24+
Returns:
25+
The secret value (or extracted key value).
26+
"""
27+
region = settings.AWS_REGION
28+
client = boto3.client("secretsmanager", region_name=region)
29+
response = client.get_secret_value(SecretId=secret_arn)
30+
secret_string = response["SecretString"]
31+
32+
if key is None:
33+
return secret_string
34+
35+
return json.loads(secret_string)[key]
36+
37+
38+
def setup_db_credentials() -> None:
39+
"""Set up database credentials from AWS Secrets Manager or settings.
40+
41+
If DB_SECRET_ARN is set, fetches the password from AWS Secrets Manager.
42+
Otherwise, uses PGPASSWORD from settings.
43+
44+
This supports automatically rotating database credentials when using AWS.
45+
"""
46+
if not settings.DB_SECRET_ARN:
47+
# Use PGPASSWORD as set in the environment
48+
return
49+
50+
logger.info("Fetching database password from AWS Secrets Manager")
51+
52+
try:
53+
password = get_aws_secret_value(settings.DB_SECRET_ARN, key="password")
54+
# override PGPASSWORD in the environment
55+
os.environ["PGPASSWORD"] = password
56+
logger.info("Database password loaded from AWS Secrets Manager")
57+
except (BotoCoreError, ClientError, KeyError, json.JSONDecodeError) as e:
58+
logger.error("Failed to fetch database password from AWS: %s", e)
59+
sys.exit(1)
60+
61+
1262
def run_ad_hoc(
1363
model: str,
1464
tasks: str,
@@ -99,6 +149,9 @@ def main() -> None:
99149
2. EVAL_TIER set: Run tier evaluation
100150
3. CLI arguments: Manual invocation
101151
"""
152+
# Fetch database credentials from AWS if DB_SECRET_ARN is set
153+
setup_db_credentials()
154+
102155
# Check for ad-hoc mode
103156
if settings.EVAL_MODE == "ad-hoc":
104157
if not settings.AD_HOC_MODEL or not settings.AD_HOC_TASKS:

apps/evaluations/src/evaluations/run_local.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -89,13 +89,25 @@ def main() -> int:
8989
"-e",
9090
f"PGUSER={settings.PGUSER}",
9191
"-e",
92-
f"PGPASSWORD={settings.PGPASSWORD}",
93-
"-e",
9492
f"AWS_ACCESS_KEY_ID={settings.AWS_ACCESS_KEY_ID or ''}",
9593
"-e",
9694
f"AWS_SECRET_ACCESS_KEY={settings.AWS_SECRET_ACCESS_KEY or ''}",
9795
])
98-
logger.info("Running: tier=%s, task_index=%d (with storage)", args.tier, args.task_index)
96+
# Use DB_SECRET_ARN if set, otherwise pass PGPASSWORD directly
97+
if settings.DB_SECRET_ARN:
98+
docker_cmd.extend([
99+
"-e",
100+
f"DB_SECRET_ARN={settings.DB_SECRET_ARN}",
101+
"-e",
102+
f"AWS_REGION={settings.AWS_REGION}",
103+
])
104+
logger.info("Running: tier=%s, task_index=%d (with storage, AWS secrets)", args.tier, args.task_index)
105+
elif settings.PGPASSWORD:
106+
docker_cmd.extend(["-e", f"PGPASSWORD={settings.PGPASSWORD}"])
107+
logger.info("Running: tier=%s, task_index=%d (with storage)", args.tier, args.task_index)
108+
else:
109+
logger.warning("Neither DB_SECRET_ARN nor PGPASSWORD is set")
110+
logger.info("Running: tier=%s, task_index=%d (with storage, no password)", args.tier, args.task_index)
99111
else:
100112
docker_cmd.extend(["-e", "LOCAL=true"])
101113
logger.info("Running: tier=%s, task_index=%d (local mode, no storage)", args.tier, args.task_index)

apps/evaluations/src/evaluations/settings.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,13 @@ class Settings(BaseSettings):
1414
PGPORT: str = "5432"
1515
PGDATABASE: str = "olmo_eval"
1616
PGUSER: str = "postgres"
17-
PGPASSWORD: str = "postgres"
17+
PGPASSWORD: str | None = None
1818
DB_SECRET_ARN: str | None = None
1919

20-
# AWS credentials
20+
# AWS credentials and settings
2121
AWS_ACCESS_KEY_ID: str | None = None
2222
AWS_SECRET_ACCESS_KEY: str | None = None
23+
AWS_REGION: str = "us-east-1"
2324

2425
# Settings below are set at runtime and control evaluation behavior. They shouldn't
2526
# be set in .env files, but are included here for validation and documentation.
File renamed without changes.

0 commit comments

Comments
 (0)