fixie-ai · zqhuang211 · Feb 14, 2025 · Feb 13, 2025 · Feb 13, 2025 · Feb 14, 2025
diff --git a/.gitattributes b/.gitattributes
@@ -1 +1,2 @@
 *.json filter=lfs diff=lfs merge=lfs -text
+ultravox/assets/** filter=lfs diff=lfs merge=lfs -text
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -17,6 +17,15 @@ jobs:
   test:
     runs-on: ubuntu-latest
 
+    # permissions:
+    #   # Gives the action the necessary permissions for publishing new
+    #   # comments in pull requests.
+    #   pull-requests: write
+    #   # Gives the action the necessary permissions for pushing data to the
+    #   # python-coverage-comment-action branch, and for editing existing
+    #   # comments (to avoid publishing multiple comments in the same PR)
+    #   contents: write
+
     steps:
       - uses: actions/checkout@v4
         with:
@@ -37,8 +46,15 @@ jobs:
         run: |
           just check
 
-      - name: Run tests
+      - name: Run tests and coverage
         env:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
         run: |
           just test-verbose
+
+      # - name: Coverage comment
+      #   id: coverage_comment
+      #   uses: py-cov-action/python-coverage-comment-action@v3
+      #   with:
+      #     GITHUB_TOKEN: ${{ github.token }}
+      #     COVERAGE_PATH: ultravox
diff --git a/.gitignore b/.gitignore
@@ -12,7 +12,6 @@ output_audio_processor/
 output_tokenizer/
 
 *.csv
-*.json
 epd_eval/
 .git/
 env_vars.sh
@@ -191,3 +190,5 @@ mds_output/
 mlruns/
 output/
 
+saved_tokenizer/
+.run_configs/
diff --git a/Justfile b/Justfile
@@ -12,21 +12,27 @@ install:
     poetry install
 
 format:
-    poetry run autoflake ${PROJECT_DIR} --remove-all-unused-imports --quiet --in-place -r --exclude third_party
-    poetry run isort ${PROJECT_DIR} --force-single-line-imports
-    poetry run black ${PROJECT_DIR}
+    poetry run autoflake {{PROJECT_DIR}} --remove-all-unused-imports --quiet --in-place -r 
+    poetry run isort {{PROJECT_DIR}} --force-single-line-imports 
+    poetry run black {{PROJECT_DIR}} 
 
 check:
-    poetry run black ${PROJECT_DIR} --check
-    poetry run isort ${PROJECT_DIR} --check --force-single-line-imports
-    poetry run autoflake  ${PROJECT_DIR} --check --quiet --remove-all-unused-imports -r --exclude third_party
-    poetry run mypy ${PROJECT_DIR}    
+    poetry run black {{PROJECT_DIR}} --check 
+    poetry run isort {{PROJECT_DIR}} --check --force-single-line-imports
+    poetry run autoflake {{PROJECT_DIR}} --check --quiet --remove-all-unused-imports -r 
+    poetry run mypy {{PROJECT_DIR}} 
 
 test *ARGS=".":
-    cd ${PROJECT_DIR} && poetry run pytest --ignore third_party {{ARGS}}
+    cd ${PROJECT_DIR} && poetry run coverage run --source=${PROJECT_DIR} -m pytest --ignore third_party {{ARGS}}
+    just print-coverage
 
 test-verbose *ARGS=".":
-    cd ${PROJECT_DIR} && poetry run pytest --ignore third_party {{ARGS}} -vv --log-cli-level=INFO {{ARGS}}
+    cd ${PROJECT_DIR} && poetry run coverage run --source=${PROJECT_DIR} -m pytest --ignore third_party {{ARGS}} -vv --log-cli-level=INFO
+    just print-coverage
+
+# the following assumes the coverage report is already created by the test command
+print-coverage *ARGS:
+    cd ${PROJECT_DIR} && poetry run coverage report --omit "*_test.py" --sort miss {{ARGS}}
 
 @python *FLAGS:
     poetry run python {{FLAGS}}
@@ -44,7 +50,7 @@ infer *FLAGS:
     poetry run python -m ultravox.tools.infer_tool {{FLAGS}}
 
 eval *FLAGS:
-    poetry run python -m ultravox.tools.eval_tool {{FLAGS}}
+    poetry run python -m ultravox.evaluation.eval {{FLAGS}}
 
 tts *FLAGS:
     poetry run python -m ultravox.tools.ds_tool.ds_tool tts {{FLAGS}}
@@ -61,6 +67,9 @@ gradio *FLAGS:
 run *FLAGS:
     poetry run mcli run -f mcloud.yaml --follow {{FLAGS}}
 
+vllm_eval *FLAGS:
+    poetry run mcli run -f mcloud_eval.yaml --follow {{FLAGS}}
+
 mcloud *FLAGS:
     poetry run mcli interactive {{FLAGS}} --cluster ${MCLOUD_CLUSTER} --instance ${MCLOUD_INSTANCE}  --name `whoami` --command "bash -c \"$(cat setup.sh)\"" 
 

diff --git a/README.md b/README.md
@@ -35,7 +35,6 @@ Ultravox can be trained against any open-weight model. See below for more detail
 
 See Ultravox in action on our [demo page](https://demo.ultravox.ai). You can build your own voice-to-voice agents on our Realtime platform at ultravox.ai.
 
-You can run the Gradio demo locally with `just gradio`. You can run the demo in "voice mode" which allows natural audio conversations with ultravox by running `just gradio --voice_mode=True`
 
 ### Discord
 
@@ -76,13 +75,28 @@ brew update
 brew install just
 ```
 
-Create a Python virtual environment and install the necessary packages:
+It's recommended to use pyenv for managing environments due to the use of Poetry:
+
+```bash
+brew install xz
+brew install pyenv
+pyenv init
+pyenv install 3.11
+pyenv global 3.11
+
+# Optional
+pyenv shell 3.11
+```
+
+>**Note**: Use of conda is NOT recommended with Poetry
+
+After creating a virtual environment, install required packages using `just` and `poetry`:
 
 ```bash
 just install
 ```
 
-We're using Poetry to manage the Python virtual environment.
+We're using Poetry to manage the Python virtual environment. You can observe your environment with `poetry env info`.
 
 ### Mosaic Environment Setup (Fixie Internal)
 
@@ -103,14 +117,11 @@ mcli set api-key <new-value>
 ```bash
 # Huggging Face token for accessing walled data and models
 mcli create secret env HF_TOKEN=hf_<your_token>
+mcli create secret env HF_WRITE_TOKEN=hf_<your_token_with_write_access>
 
 # WandB token for logging experiments
 mcli create secret env WANDB_PROJECT=ultravox
 mcli create secret env WANDB_API_KEY=<your_wandb_key>
-
-# GCP credentials for accessing data (e.g. BoolQ)
-# Get service_account.json file from Justin/Farzad and put it in the root dir, then
-mcli create secret gcp
 ```
 
 ## Training
@@ -167,15 +178,24 @@ See [`configs_base.py`](ultravox/training/config_base.py) to find the parameters
 
 ### MosaicML Training (Fixie Internal)
 
-Before running any training jobs, you need to setup your SSH key in the Mosaic Platform: https://docs.mosaicml.com/projects/mcli/en/latest/resources/secrets/ssh.html#page-secrets-ssh
+Before running any training jobs, set up [SSH authentication with MosaicML](https://docs.mosaicml.com/projects/mcli/en/latest/resources/secrets/ssh.html#page-secrets-ssh):
 
-```bash
-## Create a new SSH key and add it to the Mosaic Platform
-# ssh-keygen -f ~/.ssh/mclid_id_rsa
-## add the **public** key to GitHub
-# mcli create secret ssh ~/.ssh/mclid_id_rsa
+1. Generate an SSH key:
+   ```bash
+   ssh-keygen -f ~/.ssh/mclid_id_rsa
+   ```
+
+2. Add the public key to your GitHub account
 
-mcli run -f mcloud.yaml --follow
+3. Upload the private key to MosaicML (this allows MosaicML to clone the repository and run jobs):
+   ```bash
+   mcli create secret git-ssh ~/.ssh/mclid_id_rsa
+   ```
+
+Then you can run the following command to kick off a training job:
+
+```bash
+mcli run -f mcloud_train.yaml --follow
 ```
 
 Other useful commands:
@@ -187,7 +207,7 @@ mcli util r7z2
 mcli get runs
 mcli get runs --cluster r7z2
 
-mcli run -f mcloud.yaml --follow
+mcli run -f mcloud_eval.yaml --follow
 ```
 
 For interactive runs you can use:
@@ -199,8 +219,13 @@ IMPORTANT: Make sure to monitor your jobs and stop the machine when you're done
 
 ### Running evaluations
 
-1. Use `infer_tool.py --json > file` to create a jsonl output from a given model/dataset combo, where each line contains two values: **question** and **answer**.
-2. Use `eval_tool.py -f file` to evaluate the jsonl file, which will produce an average score for the model on the dataset.
+For inference or evaluations, you can use:
+
+```bash
+just eval --config_path ultravox/evaluation/configs/eval_config.yaml
+```
+
+where `eval_config.yaml` is a config file that specifies the model, datasets, and configurations to use for inference or evaluation. If your dataset is not already defined in ultravox, you need to create a config file for your dataset in `ultravox/data/configs/` (with the appropriate `eval_config` field to specify evaluation metrics and arguments), and register it in `ultravox/data/registry.py`. Please refer to examples in `ultravox/data/configs/`.
 
 ## Misc
 

diff --git a/mcloud_eval.yaml b/mcloud_eval.yaml
@@ -0,0 +1,22 @@
+# Ultravox eval with vllm configuration
+name: ultravox-eval
+image: mosaicml/composer:latest
+compute:
+  gpus: 8
+  cluster: r15z1p1
+integrations:
+  - integration_type: git_repo
+    git_repo: fixie-ai/ultravox
+    git_branch: $UV_BRANCH
+    pip_install: poetry==1.7.1
+scheduling:
+  max_duration: 2  # 2 hours max for jobs to avoid hanging jobs
+command: >-
+  cd ultravox &&
+  poetry install --no-dev &&
+  poetry run torchrun --nproc_per_node=8 -m ultravox.evaluation.eval $EVAL_ARGS
+env_variables:
+  MLFLOW_TRACKING_URI: databricks
+  UV_BRANCH: main
+  EVAL_ARGS: --config_path ultravox/evaluation/configs/eval_config.yaml
+  HF_HUB_ENABLE_HF_TRANSFER: 1
diff --git a/mcloud_oaieval.yaml b/mcloud_oaieval.yaml
@@ -0,0 +1,22 @@
+# Ultravox eval with vllm configuration
+name: ultravox-oaieval
+image: mosaicml/composer:latest
+compute:
+  gpus: 8
+  cluster: r15z1p1
+integrations:
+  - integration_type: git_repo
+    git_repo: fixie-ai/ultravox
+    git_branch: $UV_BRANCH
+    pip_install: poetry==1.7.1
+scheduling:
+  max_duration: 2  # 2 hours max for jobs to avoid hanging jobs
+command: >-
+  cd ultravox &&
+  poetry install --no-dev &&
+  poetry run pip install vllm &&
+  HF_TOKEN=$HF_WRITE_TOKEN poetry run python -m ultravox.inference.run_vllm_inference $EVAL_ARGS
+env_variables:
+  MLFLOW_TRACKING_URI: databricks
+  UV_BRANCH: main
+  EVAL_ARGS: --evalset audio-core --model fixie-ai/ultravox-v0_5-llama-3_1-8b
diff --git a/mcloud.yaml → mcloud_train.yaml b/mcloud.yaml → mcloud_train.yaml
@@ -1,9 +1,9 @@
 # Ultravox training configuration
-name: ultravox
+name: ultravox-train
 image: mosaicml/composer:latest
 compute:
   gpus: 8
-  cluster: r14z3p1
+  cluster: r15z1p1
 integrations:
   - integration_type: git_repo
     git_repo: fixie-ai/ultravox
@@ -20,3 +20,8 @@ env_variables:
   MLFLOW_TRACKING_URI: databricks
   UV_BRANCH: main
   TRAIN_ARGS: --config_path ultravox/training/configs/release_config.yaml
+  HF_HUB_DOWNLOAD_TIMEOUT: "300"  # Set timeout to 300 seconds (5 minutes)
+  HF_HUB_ENABLE_HF_TRANSFER: 1
+  # RAM Efficient Loading: only the first process loads the pretrained model checkoint while all other processes have empty
+  # weights. Only applicable for Transformers. Forcibly sets `sync_module_states` to `True`.
+  FSDP_CPU_RAM_EFFICIENT_LOADING: 1
Original file line number	Diff line number	Diff line change
		@@ -1 +1,2 @@
		*.json filter=lfs diff=lfs merge=lfs -text
		ultravox/assets/** filter=lfs diff=lfs merge=lfs -text