Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .dvc/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
/config.local
/tmp
/cache
4 changes: 4 additions & 0 deletions .dvc/config
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[core]
remote = gcs
['remote "gcs"']
url = gs://airr-modelplane-dev-dvc/modelplane
3 changes: 3 additions & 0 deletions .dvcignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Add patterns of files dvc should ignore, which could improve
# the performance. Learn more at
# https://dvc.org/doc/user-guide/dvcignore
4 changes: 4 additions & 0 deletions .env
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ MLFLOW_ARTIFACT_DESTINATION=./mlruns
# Google Storage
# MLFLOW_ARTIFACT_DESTINATION=gs://bucket/path
# GOOGLE_CLOUD_PROJECT=google-project-id
# Needed for both cloud artifacts and DVC support
# GOOGLE_CREDENTIALS_PATH=~/.config/gcloud/application_default_credentials.json

# AWS S3
Expand All @@ -32,3 +33,6 @@ MLFLOW_ARTIFACT_DESTINATION=./mlruns

# this path is relative to where jupyter is started
MODEL_SECRETS_PATH=./config/secrets.toml

# Used by the mock vllm server to authenticate requests
VLLM_API_KEY=changeme
16 changes: 13 additions & 3 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@ on:
branches:
- main
pull_request:
workflow_dispatch:
inputs:
branch:
description: 'Branch'
required: true
default: main

jobs:
cli-test:
Expand All @@ -13,6 +19,8 @@ jobs:
steps:
- name: Checkout code
uses: actions/checkout@v3
with:
ref: ${{ github.event.inputs.branch || github.head_ref || github.ref_name }}

- name: Set up Python
uses: actions/setup-python@v4
Expand All @@ -21,7 +29,7 @@ jobs:

- name: Start MLflow server (no jupyter)
run: |
./start_services.sh no-jupyter -d
./start_services.sh --no-jupyter -d

- name: Install poetry
run: pipx install "poetry == 1.8.5"
Expand All @@ -47,15 +55,17 @@ jobs:
steps:
- name: Checkout code
uses: actions/checkout@v3
with:
ref: ${{ github.event.inputs.branch || github.head_ref || github.ref_name }}

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.12"

- name: Start MLflow server
- name: Start MLflow server with jupyter and vllm
run: |
./start_services.sh -d
./start_services.sh -d --vllm

- name: Copy test script to Jupyter container
run: |
Expand Down
10 changes: 8 additions & 2 deletions Dockerfile.jupyter
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,19 @@ ENV USE_PRIVATE_MODELBENCH=${USE_PRIVATE_MODELBENCH}
# Used for the notebook server
WORKDIR /app

RUN apt-get update && apt-get install -y pipx openssh-client && \
# pipx needed for poetry installation
# ssh client needed for installing private modelbench dependencies
# git needed dvc
RUN apt-get update && apt-get install -y pipx openssh-client git && \
pipx install poetry
COPY pyproject.toml poetry.lock README.md ./

RUN mkdir -p /root/.ssh && chmod 700 /root/.ssh
RUN git config --global core.sshCommand "ssh -o UserKnownHostsFile=/root/.ssh/known_hosts -o ForwardAgent=yes"
RUN ssh-keyscan github.com > /root/.ssh/known_hosts

# conditionally forward ssh key to install private dependencies
RUN --mount=type=ssh if [ "$USE_PRIVATE_MODELBENCH" = "true" ]; then \
ssh-keyscan github.com > /etc/ssh/ssh_known_hosts; \
poetry install --no-interaction --no-ansi --no-root --extras modelbench-private; \
else \
poetry install --no-interaction --no-ansi --no-root; \
Expand Down
12 changes: 12 additions & 0 deletions Dockerfile.mockvllm
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
FROM python:3.12-slim

WORKDIR /app

COPY tests/notebooks/mock_vllm_server.py .

# versions chosen to match whats in poetry.lock as of 2025-07-09
RUN pip install fastapi==0.115.12 uvicorn==0.34.3

EXPOSE ${VLLM_PORT}

CMD ["sh", "-c", "uvicorn mock_vllm_server:app --host $VLLM_HOST --port $VLLM_PORT"]
12 changes: 11 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,16 @@

Develop new evaluators / annotators.

## ⚠️ Content warning

The sample datasets provided in the [`flightpaths/data`](https://github.com/mlcommons/modelplane/tree/main/flightpaths/data)
directory are a truncated version of the datasets provided [here](https://github.com/mlcommons/ailuminate).
These data come with the following warning:

>This dataset was created to elicit hazardous responses. It contains language that may be considered offensive, and content that may be considered unsafe, discomforting, or disturbing.
>Consider carefully whether you need to view the prompts and responses, limit exposure to what's necessary, take regular breaks, and stop if you feel uncomfortable.
>For more information on the risks, see [this literature review](https://www.zevohealth.com/wp-content/uploads/2024/07/lit_review_IN-1.pdf) on vicarious trauma.

## Get Started

You must have docker installed on your system. The
Expand Down Expand Up @@ -29,7 +39,7 @@ given docker-compose.yaml file will start up:
```
If you are using the cli only, and not using jupyter, you must pass the `no-jupyter` option:
```bash
./start_services.sh -d no-jupyter
./start_services.sh -d --no-jupyter
```
1. Visit the [Jupyter Server](http://localhost:8888/?token=changeme). The
token is configured in the .env file. You shouldn't need to enter it
Expand Down
25 changes: 25 additions & 0 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,37 @@ services:
USE_PRIVATE_MODELBENCH: ${USE_PRIVATE_MODELBENCH}
JUPYTER_TOKEN: ${JUPYTER_TOKEN}
GIT_PYTHON_REFRESH: ${GIT_PYTHON_REFRESH}
VLLM_API_KEY: ${VLLM_API_KEY}
# Below env needed for dvc (via git) support (backed by GCP)
# SSH_AUTH_SOCK: /ssh-agent
# GOOGLE_APPLICATION_CREDENTIALS: /creds/gcp-key.json
ports:
- "8888:8888"
volumes:
- ./flightpaths:/app/flightpaths
# Volume not needed if using cloud storage for artifacts
- ./mlruns:/mlruns
# Below needed for dvc (via git) support (backed by GCP)
# - ${SSH_AUTH_SOCK:-/dev/null}:/ssh-agent
# - ${GOOGLE_CREDENTIALS_PATH:-/dev/null}:/creds/gcp-key.json:ro

# Runs a dummy docker container to mock a vLLM server
vllm:
build:
context: .
dockerfile: Dockerfile.mockvllm
environment:
VLLM_MODEL: mlc/not-real-model
VLLM_HOST: 0.0.0.0
VLLM_PORT: 8001
VLLM_API_KEY: ${VLLM_API_KEY}
ports:
- "8001:8001"
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8001/health"]
interval: 10s
timeout: 5s
retries: 10

volumes:
pgdata:
14 changes: 14 additions & 0 deletions flightpaths/config/secrets.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Edit this file to add your secrets.

# This is an example of how to define a secret.
# The config is saying that within scope "vllm" we have a
# key named "api_key" that we are setting to value "changeme".
[vllm]
api_key = "changeme"

# Here are some commonly needed keys you can uncomment and use.
[together]
# api_key = "fake key"

[perspective_api]
# api_key = "<your key here>"
1 change: 1 addition & 0 deletions flightpaths/data/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
/demo_prompts_mini.csv
5 changes: 5 additions & 0 deletions flightpaths/data/demo_prompts_mini.csv.dvc
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
outs:
- md5: 22fbc36cf0afa5428086fc53dd182ee4
size: 24779
hash: md5
path: demo_prompts_mini.csv
Loading