Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
*.json filter=lfs diff=lfs merge=lfs -text
ultravox/assets/** filter=lfs diff=lfs merge=lfs -text
18 changes: 17 additions & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,15 @@ jobs:
test:
runs-on: ubuntu-latest

# permissions:
# # Gives the action the necessary permissions for publishing new
# # comments in pull requests.
# pull-requests: write
# # Gives the action the necessary permissions for pushing data to the
# # python-coverage-comment-action branch, and for editing existing
# # comments (to avoid publishing multiple comments in the same PR)
# contents: write

steps:
- uses: actions/checkout@v4
with:
Expand All @@ -37,8 +46,15 @@ jobs:
run: |
just check

- name: Run tests
- name: Run tests and coverage
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
run: |
just test-verbose

# - name: Coverage comment
# id: coverage_comment
# uses: py-cov-action/python-coverage-comment-action@v3
# with:
# GITHUB_TOKEN: ${{ github.token }}
# COVERAGE_PATH: ultravox
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ output_audio_processor/
output_tokenizer/

*.csv
*.json
epd_eval/
.git/
env_vars.sh
Expand Down Expand Up @@ -191,3 +190,5 @@ mds_output/
mlruns/
output/

saved_tokenizer/
.run_configs/
29 changes: 19 additions & 10 deletions Justfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,21 +12,27 @@ install:
poetry install

format:
poetry run autoflake ${PROJECT_DIR} --remove-all-unused-imports --quiet --in-place -r --exclude third_party
poetry run isort ${PROJECT_DIR} --force-single-line-imports
poetry run black ${PROJECT_DIR}
poetry run autoflake {{PROJECT_DIR}} --remove-all-unused-imports --quiet --in-place -r
poetry run isort {{PROJECT_DIR}} --force-single-line-imports
poetry run black {{PROJECT_DIR}}

check:
poetry run black ${PROJECT_DIR} --check
poetry run isort ${PROJECT_DIR} --check --force-single-line-imports
poetry run autoflake ${PROJECT_DIR} --check --quiet --remove-all-unused-imports -r --exclude third_party
poetry run mypy ${PROJECT_DIR}
poetry run black {{PROJECT_DIR}} --check
poetry run isort {{PROJECT_DIR}} --check --force-single-line-imports
poetry run autoflake {{PROJECT_DIR}} --check --quiet --remove-all-unused-imports -r
poetry run mypy {{PROJECT_DIR}}

test *ARGS=".":
cd ${PROJECT_DIR} && poetry run pytest --ignore third_party {{ARGS}}
cd ${PROJECT_DIR} && poetry run coverage run --source=${PROJECT_DIR} -m pytest --ignore third_party {{ARGS}}
just print-coverage

test-verbose *ARGS=".":
cd ${PROJECT_DIR} && poetry run pytest --ignore third_party {{ARGS}} -vv --log-cli-level=INFO {{ARGS}}
cd ${PROJECT_DIR} && poetry run coverage run --source=${PROJECT_DIR} -m pytest --ignore third_party {{ARGS}} -vv --log-cli-level=INFO
just print-coverage

# the following assumes the coverage report is already created by the test command
print-coverage *ARGS:
cd ${PROJECT_DIR} && poetry run coverage report --omit "*_test.py" --sort miss {{ARGS}}

@python *FLAGS:
poetry run python {{FLAGS}}
Expand All @@ -44,7 +50,7 @@ infer *FLAGS:
poetry run python -m ultravox.tools.infer_tool {{FLAGS}}

eval *FLAGS:
poetry run python -m ultravox.tools.eval_tool {{FLAGS}}
poetry run python -m ultravox.evaluation.eval {{FLAGS}}

tts *FLAGS:
poetry run python -m ultravox.tools.ds_tool.ds_tool tts {{FLAGS}}
Expand All @@ -61,6 +67,9 @@ gradio *FLAGS:
run *FLAGS:
poetry run mcli run -f mcloud.yaml --follow {{FLAGS}}

vllm_eval *FLAGS:
poetry run mcli run -f mcloud_eval.yaml --follow {{FLAGS}}

mcloud *FLAGS:
poetry run mcli interactive {{FLAGS}} --cluster ${MCLOUD_CLUSTER} --instance ${MCLOUD_INSTANCE} --name `whoami` --command "bash -c \"$(cat setup.sh)\""

Expand Down
59 changes: 42 additions & 17 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ Ultravox can be trained against any open-weight model. See below for more detail

See Ultravox in action on our [demo page](https://demo.ultravox.ai). You can build your own voice-to-voice agents on our Realtime platform at ultravox.ai.

You can run the Gradio demo locally with `just gradio`. You can run the demo in "voice mode" which allows natural audio conversations with ultravox by running `just gradio --voice_mode=True`

### Discord

Expand Down Expand Up @@ -76,13 +75,28 @@ brew update
brew install just
```

Create a Python virtual environment and install the necessary packages:
It's recommended to use pyenv for managing environments due to the use of Poetry:

```bash
brew install xz
brew install pyenv
pyenv init
pyenv install 3.11
pyenv global 3.11

# Optional
pyenv shell 3.11
```

>**Note**: Use of conda is NOT recommended with Poetry

After creating a virtual environment, install required packages using `just` and `poetry`:

```bash
just install
```

We're using Poetry to manage the Python virtual environment.
We're using Poetry to manage the Python virtual environment. You can observe your environment with `poetry env info`.

### Mosaic Environment Setup (Fixie Internal)

Expand All @@ -103,14 +117,11 @@ mcli set api-key <new-value>
```bash
# Huggging Face token for accessing walled data and models
mcli create secret env HF_TOKEN=hf_<your_token>
mcli create secret env HF_WRITE_TOKEN=hf_<your_token_with_write_access>

# WandB token for logging experiments
mcli create secret env WANDB_PROJECT=ultravox
mcli create secret env WANDB_API_KEY=<your_wandb_key>

# GCP credentials for accessing data (e.g. BoolQ)
# Get service_account.json file from Justin/Farzad and put it in the root dir, then
mcli create secret gcp
```

## Training
Expand Down Expand Up @@ -167,15 +178,24 @@ See [`configs_base.py`](ultravox/training/config_base.py) to find the parameters

### MosaicML Training (Fixie Internal)

Before running any training jobs, you need to setup your SSH key in the Mosaic Platform: https://docs.mosaicml.com/projects/mcli/en/latest/resources/secrets/ssh.html#page-secrets-ssh
Before running any training jobs, set up [SSH authentication with MosaicML](https://docs.mosaicml.com/projects/mcli/en/latest/resources/secrets/ssh.html#page-secrets-ssh):

```bash
## Create a new SSH key and add it to the Mosaic Platform
# ssh-keygen -f ~/.ssh/mclid_id_rsa
## add the **public** key to GitHub
# mcli create secret ssh ~/.ssh/mclid_id_rsa
1. Generate an SSH key:
```bash
ssh-keygen -f ~/.ssh/mclid_id_rsa
```

2. Add the public key to your GitHub account

mcli run -f mcloud.yaml --follow
3. Upload the private key to MosaicML (this allows MosaicML to clone the repository and run jobs):
```bash
mcli create secret git-ssh ~/.ssh/mclid_id_rsa
```

Then you can run the following command to kick off a training job:

```bash
mcli run -f mcloud_train.yaml --follow
```

Other useful commands:
Expand All @@ -187,7 +207,7 @@ mcli util r7z2
mcli get runs
mcli get runs --cluster r7z2

mcli run -f mcloud.yaml --follow
mcli run -f mcloud_eval.yaml --follow
```

For interactive runs you can use:
Expand All @@ -199,8 +219,13 @@ IMPORTANT: Make sure to monitor your jobs and stop the machine when you're done

### Running evaluations

1. Use `infer_tool.py --json > file` to create a jsonl output from a given model/dataset combo, where each line contains two values: **question** and **answer**.
2. Use `eval_tool.py -f file` to evaluate the jsonl file, which will produce an average score for the model on the dataset.
For inference or evaluations, you can use:

```bash
just eval --config_path ultravox/evaluation/configs/eval_config.yaml
```

where `eval_config.yaml` is a config file that specifies the model, datasets, and configurations to use for inference or evaluation. If your dataset is not already defined in ultravox, you need to create a config file for your dataset in `ultravox/data/configs/` (with the appropriate `eval_config` field to specify evaluation metrics and arguments), and register it in `ultravox/data/registry.py`. Please refer to examples in `ultravox/data/configs/`.

## Misc

Expand Down
22 changes: 22 additions & 0 deletions mcloud_eval.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Ultravox eval with vllm configuration
name: ultravox-eval
image: mosaicml/composer:latest
compute:
gpus: 8
cluster: r15z1p1
integrations:
- integration_type: git_repo
git_repo: fixie-ai/ultravox
git_branch: $UV_BRANCH
pip_install: poetry==1.7.1
scheduling:
max_duration: 2 # 2 hours max for jobs to avoid hanging jobs
command: >-
cd ultravox &&
poetry install --no-dev &&
poetry run torchrun --nproc_per_node=8 -m ultravox.evaluation.eval $EVAL_ARGS
env_variables:
MLFLOW_TRACKING_URI: databricks
UV_BRANCH: main
EVAL_ARGS: --config_path ultravox/evaluation/configs/eval_config.yaml
HF_HUB_ENABLE_HF_TRANSFER: 1
22 changes: 22 additions & 0 deletions mcloud_oaieval.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Ultravox eval with vllm configuration
name: ultravox-oaieval
image: mosaicml/composer:latest
compute:
gpus: 8
cluster: r15z1p1
integrations:
- integration_type: git_repo
git_repo: fixie-ai/ultravox
git_branch: $UV_BRANCH
pip_install: poetry==1.7.1
scheduling:
max_duration: 2 # 2 hours max for jobs to avoid hanging jobs
command: >-
cd ultravox &&
poetry install --no-dev &&
poetry run pip install vllm &&
HF_TOKEN=$HF_WRITE_TOKEN poetry run python -m ultravox.inference.run_vllm_inference $EVAL_ARGS
env_variables:
MLFLOW_TRACKING_URI: databricks
UV_BRANCH: main
EVAL_ARGS: --evalset audio-core --model fixie-ai/ultravox-v0_5-llama-3_1-8b
9 changes: 7 additions & 2 deletions mcloud.yaml → mcloud_train.yaml
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# Ultravox training configuration
name: ultravox
name: ultravox-train
image: mosaicml/composer:latest
compute:
gpus: 8
cluster: r14z3p1
cluster: r15z1p1
integrations:
- integration_type: git_repo
git_repo: fixie-ai/ultravox
Expand All @@ -20,3 +20,8 @@ env_variables:
MLFLOW_TRACKING_URI: databricks
UV_BRANCH: main
TRAIN_ARGS: --config_path ultravox/training/configs/release_config.yaml
HF_HUB_DOWNLOAD_TIMEOUT: "300" # Set timeout to 300 seconds (5 minutes)
HF_HUB_ENABLE_HF_TRANSFER: 1
# RAM Efficient Loading: only the first process loads the pretrained model checkoint while all other processes have empty
# weights. Only applicable for Transformers. Forcibly sets `sync_module_states` to `True`.
FSDP_CPU_RAM_EFFICIENT_LOADING: 1
Loading