Skip to content
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/build-container-image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:
id-token: write
steps:
- name: Checkout
uses: actions/checkout@v4
uses: actions/checkout@v6
- name: Inject enhanced GitHub environment variables
uses: rlespinasse/github-slug-action@v5 # https://github.com/rlespinasse/github-slug-action
- name: lowercase IMAGE_REGISTRY
Expand Down
46 changes: 26 additions & 20 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@ name: Check Requirements
on:
push:
paths:
- "requirements-linux.txt"
- "requirements-windows.txt"
- "pyproject.toml"
- "uv.lock"

pull_request:
paths:
- "requirements-linux.txt"
- "requirements-windows.txt"
- "pyproject.toml"
- "uv.lock"

jobs:
build:
Expand All @@ -23,29 +23,35 @@ jobs:
TARGET: macOS
- os: windows-latest
TARGET: Windows
python-version: [ "3.10" ]
python-version: [ "3.12" ]

steps:
- name: Checkout repository
uses: actions/checkout@v3
uses: actions/checkout@v6

- name: Set up Python 3.10
uses: actions/setup-python@v4
- name: Set up Python 3.12
uses: actions/setup-python@v6
with:
python-version: ${{ matrix.python-version }}

- name: Install dependencies (Windows)
if: matrix.TARGET == 'Windows'
- name: Install uv
run: |
python -m pip install --upgrade pip
pip install -r requirements-windows.txt
curl -LsSf https://astral.sh/uv/install.sh | sh
shell: bash

- name: Install dependencies (others)
- name: Add uv to PATH (Unix)
if: matrix.TARGET != 'Windows'
run: |
python -m pip install --upgrade pip
pip install -r requirements-linux.txt

- name: Install gpu dependencies
if: matrix.TARGET != 'macOS'
run: pip3 install torch==2.0.1+cu117 torchvision==0.15.2+cu117 torchaudio==2.0.2+cu117 --index-url https://download.pytorch.org/whl/cu117
run: echo "$HOME/.local/bin" >> $GITHUB_PATH
shell: bash

- name: Add uv to PATH (Windows)
if: matrix.TARGET == 'Windows'
run: echo "$env:USERPROFILE\.local\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
shell: pwsh

- name: Sync dependencies from pyproject.toml
run: uv sync
shell: bash



4 changes: 2 additions & 2 deletions .github/workflows/pylint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@ jobs:
python-version: ["3.10"]
steps:
- name: Checkout code
uses: actions/checkout@v3
uses: actions/checkout@v6

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
uses: actions/setup-python@v6
with:
python-version: ${{ matrix.python-version }}

Expand Down
25 changes: 10 additions & 15 deletions .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,30 +7,25 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [ "3.10" ]
python-version: [ "3.12" ]

steps:
- name: Checkout code
uses: actions/checkout@v3
uses: actions/checkout@v6

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
uses: actions/setup-python@v6
with:
python-version: ${{ matrix.python-version }}

- name: Install dependencies (Windows)
if: matrix.TARGET == 'Windows'
- name: Install uv
run: |
python -m pip install --upgrade pip
pip install -r requirements-windows.txt
#pip install -e .
curl -LsSf https://astral.sh/uv/install.sh | sh
echo "$HOME/.local/bin" >> $GITHUB_PATH

- name: Install dependencies (others)
if: matrix.TARGET != 'Windows'
run: |
python -m pip install --upgrade pip
pip install -r requirements-linux.txt
#pip install -e .
- name: Sync dependencies from pyproject.toml
run: uv sync

- name: Run pytest
run: python -m pytest --import-mode=append pytest/
run: uv run pytest --import-mode=append pytest/

50 changes: 32 additions & 18 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,30 +1,44 @@
FROM nvidia/cuda:12.6.3-runtime-ubuntu22.04
FROM nvidia/cuda:12.8.1-runtime-ubuntu22.04

# note: the python3-pip package contains Python 3.10 on Ubuntu 22.04
# Set timezone and configure non-interactive installation
ENV DEBIAN_FRONTEND=noninteractive
ENV TZ=UTC

# Install Python 3.12 from deadsnakes PPA and build tools
RUN apt-get update \
&& apt-get install git python3-pip python3.10-venv ffmpeg -y \
&& apt-get install -y software-properties-common \
&& add-apt-repository ppa:deadsnakes/ppa -y \
&& apt-get update \
&& apt-get install -y git python3.12 python3.12-venv python3.12-dev ffmpeg curl tzdata \
build-essential gcc g++ make \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*

# copy only the requirements file to leverage container image build cache
COPY ./requirements-linux.txt /app/UltraSinger/requirements-linux.txt
WORKDIR /app/UltraSinger
# Install uv
RUN curl -LsSf https://astral.sh/uv/install.sh | sh
ENV PATH="/root/.local/bin:$PATH"
ENV UV_LINK_MODE=copy

# no need to run as root
RUN chown -R 1000:1000 /app/UltraSinger
USER 1000:1000
# copy pyproject.toml first to leverage container image build cache
COPY ./pyproject.toml /app/UltraSinger/pyproject.toml
# Need to copy some minimal source structure for editable install
RUN mkdir -p /app/UltraSinger/src
WORKDIR /app/UltraSinger

# setup venv
ENV VIRTUAL_ENV=/app/UltraSinger/.venv
RUN python3 -m venv $VIRTUAL_ENV
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
# Install dependencies from pyproject.toml directly without venv (container is already isolated)
# Using build isolation (without --no-build-isolation) so uv handles all build dependencies automatically
RUN uv pip install --system --python 3.12 -e .

# install dependencies
RUN pip install --no-cache-dir -r requirements-linux.txt \
&& pip install --no-cache-dir torch==2.3.0 torchvision==0.18.0 torchaudio==2.3.0 --index-url https://download.pytorch.org/whl/cu121 \
&& pip install --no-cache-dir tensorflow[and-cuda]==2.16.1
# Install PyTorch with CUDA support (override the CPU version from pyproject.toml)
RUN uv pip install --system --python 3.12 torch==2.8.0 torchvision==0.23.0 torchaudio==2.8.0 --index-url https://download.pytorch.org/whl/cu128 --reinstall

# copy sources late to allow for caching of layers which contain all the dependencies
COPY . /app/UltraSinger


# no need to run as root
RUN chown -R 1000:1000 /app/UltraSinger
USER 1000:1000

WORKDIR /app/UltraSinger/src
CMD ["bash" ]
CMD ["bash"]
58 changes: 19 additions & 39 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,16 +69,16 @@ This will help me a lot to keep this project alive and improve it.

### Installation

* Install Python 3.10 **(older and newer versions has some breaking changes)**. [Download](https://www.python.org/downloads/)
* Install Python 3.12 **(older/newer versions may have compatibility issues)**. [Download](https://www.python.org/downloads/)
* Also download or install ffmpeg with PATH. [Download](https://www.ffmpeg.org/download.html)
* Go to folder `install` and run install script for your OS.
* Choose `GPU` if you have an nvidia CUDA GPU.
* Choose `CPU` if you don't have an nvidia CUDA GPU.
* Go to folder `install` and run install script for your OS:
* Choose `GPU` if you have an NVIDIA CUDA GPU.
* Choose `CPU` if you don't have an NVIDIA GPU or want CPU-only processing.

### Run

* In root folder just run `run_on_windows.bat`, `run_on_linux.sh` or `run_on_macos.command` to start the app.
* Now you can use the UltraSinger source code with `py UltraSinger.py [opt] [mode] [transcription] [pitcher] [extra]`. See [How to use](#how-to-use) for more information.
* In root folder just run `run_on_windows.bat`, `run_on_linux.sh` or `run_on_mac.command` to start the app.
* Now you can use the UltraSinger source code with `py UltraSinger.py [opt] [mode] [transcription] [pitcher] [extra]`. See [How to use](#-how-to-use-the-app) for more information.

## 📖 How to use the App

Expand Down Expand Up @@ -119,11 +119,6 @@ _Not all options working now!_
--whisper_compute_type Change to "int8" if low on GPU mem (may reduce accuracy) >> ((default) is "float16" for cuda devices, "int8" for cpu)
--keep_numbers Numbers will be transcribed as numerics instead of as words

[pitcher]
# Default is crepe
--crepe tiny|full >> ((default) is full)
--crepe_step_size unit is miliseconds >> ((default) is 10)

[extra]
--disable_hyphenation Disable word hyphenation. Hyphenation is enabled by default.
--disable_separation Disable track separation. Track separation is enabled by default.
Expand All @@ -143,7 +138,6 @@ _Not all options working now!_
[device]
--force_cpu Force all steps to be processed on CPU.
--force_whisper_cpu Only whisper will be forced to cpu
--force_crepe_cpu Only crepe will be forced to cpu
```

For standard use, you only need to use [opt]. All other options are optional.
Expand Down Expand Up @@ -219,14 +213,9 @@ starts at the place or is heard. To disable:

### 👂 Pitcher

Pitching is done with the `crepe` model.
Also consider that a bigger model is more accurate, but also takes longer to pitch.
For just testing you should use `tiny`.
If you want solid accurate, then use the `full` model.

```commandline
-i XYZ --crepe full
```
Pitching is done with the `SwiftF0` model, which is faster and more accurate than CREPE.
SwiftF0 automatically detects pitch frequencies between 46.875 Hz (G1) and 2093.75 Hz (C7).
UltraSinger uses 60hz and 400hz

### 👄 Separation

Expand Down Expand Up @@ -285,33 +274,24 @@ this MIDI and sheet are created. And you also want to have accurate files

With a GPU you can speed up the process. Also the quality of the transcription and pitching is better.

You need a cuda device for this to work. Sorry, there is no cuda device for macOS.

It is optional (but recommended) to install the cuda driver for your gpu: see [driver](https://developer.nvidia.com/cuda-downloads).
Install torch with cuda separately in your `venv`. See [tourch+cuda](https://pytorch.org/get-started/locally/).
Also check you GPU cuda support. See [cuda support](https://gist.github.com/standaloneSA/99788f30466516dbcc00338b36ad5acf)
You need an NVIDIA CUDA device for this to work. Sorry, there is no CUDA device for macOS.

Command for `pip`:
```
pip3 install torch==2.0.1+cu117 torchvision==0.15.2+cu117 torchaudio==2.0.2+cu117 --index-url https://download.pytorch.org/whl/cu117
```

When you want to use `conda` instead you need a [different installation command](https://pytorch.org/get-started/locally/).
For GPU support on Windows and Linux, the installation script automatically installs PyTorch with CUDA support.

#### Considerations for Windows users
It is optional (but recommended) to install the CUDA driver for your GPU: see [CUDA driver](https://developer.nvidia.com/cuda-downloads).
Also check your GPU CUDA support. See [CUDA support](https://gist.github.com/standaloneSA/99788f30466516dbcc00338b36ad5acf)

The pitch tracker used by UltraSinger (crepe) uses TensorFlow as its backend.
TensorFlow dropped GPU support for Windows for versions >2.10 as you can see in this [release note](https://github.com/tensorflow/tensorflow/releases/tag/v2.11.1) and their [installation instructions](https://www.tensorflow.org/install/pip#windows-native).

For now UltraSinger runs the latest version available that still supports GPUs on windows.
For manual installation, you can use:
```bash
uv pip install --index-url https://download.pytorch.org/whl/cu121 torch torchvision torchaudio
```

For running later versions of TensorFlow on windows while still taking advantage of GPU support the suggested solution is to [run UltraSinger in a container](container/README.md).
#### Crashes due to low VRAM

If something crashes because of low VRAM then use a smaller model.
If something crashes because of low VRAM then use a smaller Whisper model.
Whisper needs more than 8GB VRAM in the `large` model!

You can also force cpu usage with the extra option `--force_cpu`.
You can also force CPU usage with the extra option `--force_cpu`.

### 📦 Containerized (Docker or Podman)

Expand Down
3 changes: 3 additions & 0 deletions ReleaseNotes.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ Date: 2026.02.10
- Support for video as input
- Optimise scale detection
- Added quantization by key
- Changed installer to uv
- Drop crepe for SwiftF0
- upgrade to python 3.12

# Version: 0.0.12
Date: 2024.12.19
Expand Down
28 changes: 22 additions & 6 deletions colab/UltraSinger.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -25,19 +25,35 @@
"outputs": [],
"source": [
"%cd /content\n",
"\n",
"# Remove existing directory if present\n",
"#!rm -rf UltraSinger\n",
"\n",
"# Clone specific branch\n",
"!git clone https://github.com/rakuri255/UltraSinger.git\n",
"%cd /content/UltraSinger\n",
"!pip install --no-cache-dir -r requirements-linux.txt\n",
"!pip install --no-cache-dir torch==2.3.0 torchvision==0.18.0 torchaudio==2.3.0 --index-url https://download.pytorch.org/whl/cu121\n",
"!pip install --no-cache-dir tensorflow[and-cuda]==2.17.1\n",
"\n",
"# Install uv\n",
"!curl -LsSf https://astral.sh/uv/install.sh | sh\n",
"\n",
"# Add uv to PATH and configure matplotlib\n",
"import os\n",
"os.environ['PATH'] = f\"/root/.local/bin:{os.environ['PATH']}\"\n",
"os.environ['UV_LINK_MODE'] = 'copy'\n",
"os.environ['MPLBACKEND'] = 'Agg'\n",
"\n",
"# Sync dependencies from pyproject.toml\n",
"!uv sync\n",
"\n",
"# Install PyTorch with CUDA support\n",
"!uv pip install torch==2.8.0 torchvision==0.23.0 torchaudio==2.8.0 --index-url https://download.pytorch.org/whl/cu128 --force-reinstall\n",
"\n",
"%cd /content/UltraSinger/src"
]
},
{
"cell_type": "code",
"source": [
"!python UltraSinger.py -i https://www.youtube.com/watch?v=YwNs1Z0qRY0 -o /content/output"
],
"source": "!../.venv/bin/python UltraSinger.py -i https://www.youtube.com/watch?v=YwNs1Z0qRY0 -o /content/output\n",
"metadata": {
"id": "O0j4vUW0YAG2"
},
Expand Down
40 changes: 34 additions & 6 deletions install/CPU/linux_cpu.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,35 @@
#!/bin/bash
cd ..
cd ..
python3.10 -m venv .venv
source .venv/bin/activate
pip install -r requirements-linux.txt
pip install torch==2.3.1 torchvision==0.18.1 torchaudio==2.3.1
set -e

cd "$(dirname "$0")"
cd ../..

# Set link mode to copy to avoid hardlink warnings
export UV_LINK_MODE=copy

# Install uv if not already installed
if ! command -v uv &> /dev/null; then
echo "Installing uv..."
curl -LsSf https://astral.sh/uv/install.sh | sh
# Update PATH for current session
export PATH="$HOME/.local/bin:$PATH"
fi

# Verify uv is available
if ! command -v uv &> /dev/null; then
echo "Error: uv could not be found or installed"
echo "Please ensure your shell PATH includes ~/.local/bin"
exit 1
fi

echo "uv version:"
uv --version

echo "Syncing dependencies with uv..."
uv sync --extra linux

echo "Installation completed successfully!"
echo "To run UltraSinger:"
echo " source .venv/bin/activate"
echo " cd src"
echo " py UltraSinger.py"
Loading
Loading