diff --git a/.github/workflows/build-container-image.yml b/.github/workflows/build-container-image.yml index 838679a6..c476210f 100644 --- a/.github/workflows/build-container-image.yml +++ b/.github/workflows/build-container-image.yml @@ -19,7 +19,7 @@ jobs: id-token: write steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Inject enhanced GitHub environment variables uses: rlespinasse/github-slug-action@v5 # https://github.com/rlespinasse/github-slug-action - name: lowercase IMAGE_REGISTRY diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index a6926f72..309505a4 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -3,13 +3,13 @@ name: Check Requirements on: push: paths: - - "requirements-linux.txt" - - "requirements-windows.txt" + - "pyproject.toml" + - "uv.lock" pull_request: paths: - - "requirements-linux.txt" - - "requirements-windows.txt" + - "pyproject.toml" + - "uv.lock" jobs: build: @@ -23,29 +23,35 @@ jobs: TARGET: macOS - os: windows-latest TARGET: Windows - python-version: [ "3.10" ] + python-version: [ "3.12" ] steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v6 - - name: Set up Python 3.10 - uses: actions/setup-python@v4 + - name: Set up Python 3.12 + uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} - - name: Install dependencies (Windows) - if: matrix.TARGET == 'Windows' + - name: Install uv run: | - python -m pip install --upgrade pip - pip install -r requirements-windows.txt + curl -LsSf https://astral.sh/uv/install.sh | sh + shell: bash - - name: Install dependencies (others) + - name: Add uv to PATH (Unix) if: matrix.TARGET != 'Windows' - run: | - python -m pip install --upgrade pip - pip install -r requirements-linux.txt - - - name: Install gpu dependencies - if: matrix.TARGET != 'macOS' - run: pip3 install torch==2.0.1+cu117 torchvision==0.15.2+cu117 torchaudio==2.0.2+cu117 --index-url https://download.pytorch.org/whl/cu117 + run: echo "$HOME/.local/bin" >> $GITHUB_PATH + shell: bash + + - name: Add uv to PATH (Windows) + if: matrix.TARGET == 'Windows' + run: echo "$env:USERPROFILE\.local\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append + shell: pwsh + + - name: Sync dependencies from pyproject.toml + run: uv sync + shell: bash + + + diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml index bdaa706f..e2143b67 100644 --- a/.github/workflows/pylint.yml +++ b/.github/workflows/pylint.yml @@ -14,10 +14,10 @@ jobs: python-version: ["3.10"] steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v6 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v3 + uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 811ae450..9a3e430d 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -7,30 +7,25 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [ "3.10" ] + python-version: [ "3.12" ] steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v6 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v3 + uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} - - name: Install dependencies (Windows) - if: matrix.TARGET == 'Windows' + - name: Install uv run: | - python -m pip install --upgrade pip - pip install -r requirements-windows.txt - #pip install -e . + curl -LsSf https://astral.sh/uv/install.sh | sh + echo "$HOME/.local/bin" >> $GITHUB_PATH - - name: Install dependencies (others) - if: matrix.TARGET != 'Windows' - run: | - python -m pip install --upgrade pip - pip install -r requirements-linux.txt - #pip install -e . + - name: Sync dependencies from pyproject.toml + run: uv sync - name: Run pytest - run: python -m pytest --import-mode=append pytest/ + run: uv run pytest --import-mode=append pytest/ + diff --git a/Dockerfile b/Dockerfile index b2faa16f..01e4f9f3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,30 +1,44 @@ -FROM nvidia/cuda:12.6.3-runtime-ubuntu22.04 +FROM nvidia/cuda:12.8.1-runtime-ubuntu22.04 -# note: the python3-pip package contains Python 3.10 on Ubuntu 22.04 +# Set timezone and configure non-interactive installation +ENV DEBIAN_FRONTEND=noninteractive +ENV TZ=UTC + +# Install Python 3.12 from deadsnakes PPA and build tools RUN apt-get update \ - && apt-get install git python3-pip python3.10-venv ffmpeg -y \ + && apt-get install -y software-properties-common \ + && add-apt-repository ppa:deadsnakes/ppa -y \ + && apt-get update \ + && apt-get install -y git python3.12 python3.12-venv python3.12-dev ffmpeg curl tzdata \ + build-essential gcc g++ make \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* -# copy only the requirements file to leverage container image build cache -COPY ./requirements-linux.txt /app/UltraSinger/requirements-linux.txt -WORKDIR /app/UltraSinger +# Install uv +RUN curl -LsSf https://astral.sh/uv/install.sh | sh +ENV PATH="/root/.local/bin:$PATH" +ENV UV_LINK_MODE=copy -# no need to run as root -RUN chown -R 1000:1000 /app/UltraSinger -USER 1000:1000 +# copy pyproject.toml first to leverage container image build cache +COPY ./pyproject.toml /app/UltraSinger/pyproject.toml +# Need to copy some minimal source structure for editable install +RUN mkdir -p /app/UltraSinger/src +WORKDIR /app/UltraSinger -# setup venv -ENV VIRTUAL_ENV=/app/UltraSinger/.venv -RUN python3 -m venv $VIRTUAL_ENV -ENV PATH="$VIRTUAL_ENV/bin:$PATH" +# Install dependencies from pyproject.toml directly without venv (container is already isolated) +# Using build isolation (without --no-build-isolation) so uv handles all build dependencies automatically +RUN uv pip install --system --python 3.12 -e . -# install dependencies -RUN pip install --no-cache-dir -r requirements-linux.txt \ - && pip install --no-cache-dir torch==2.3.0 torchvision==0.18.0 torchaudio==2.3.0 --index-url https://download.pytorch.org/whl/cu121 \ - && pip install --no-cache-dir tensorflow[and-cuda]==2.16.1 +# Install PyTorch with CUDA support (override the CPU version from pyproject.toml) +RUN uv pip install --system --python 3.12 torch==2.8.0 torchvision==0.23.0 torchaudio==2.8.0 --index-url https://download.pytorch.org/whl/cu128 --reinstall # copy sources late to allow for caching of layers which contain all the dependencies COPY . /app/UltraSinger + + +# no need to run as root +RUN chown -R 1000:1000 /app/UltraSinger +USER 1000:1000 + WORKDIR /app/UltraSinger/src -CMD ["bash" ] +CMD ["bash"] diff --git a/README.md b/README.md index 38881f67..5e852ce7 100644 --- a/README.md +++ b/README.md @@ -69,16 +69,16 @@ This will help me a lot to keep this project alive and improve it. ### Installation -* Install Python 3.10 **(older and newer versions has some breaking changes)**. [Download](https://www.python.org/downloads/) +* Install Python 3.12 **(older/newer versions may have compatibility issues)**. [Download](https://www.python.org/downloads/) * Also download or install ffmpeg with PATH. [Download](https://www.ffmpeg.org/download.html) -* Go to folder `install` and run install script for your OS. - * Choose `GPU` if you have an nvidia CUDA GPU. - * Choose `CPU` if you don't have an nvidia CUDA GPU. +* Go to folder `install` and run install script for your OS: + * Choose `GPU` if you have an NVIDIA CUDA GPU. + * Choose `CPU` if you don't have an NVIDIA GPU or want CPU-only processing. ### Run -* In root folder just run `run_on_windows.bat`, `run_on_linux.sh` or `run_on_macos.command` to start the app. -* Now you can use the UltraSinger source code with `py UltraSinger.py [opt] [mode] [transcription] [pitcher] [extra]`. See [How to use](#how-to-use) for more information. +* In root folder just run `run_on_windows.bat`, `run_on_linux.sh` or `run_on_mac.command` to start the app. +* Now you can use the UltraSinger source code with `py UltraSinger.py [opt] [mode] [transcription] [pitcher] [extra]`. See [How to use](#-how-to-use-the-app) for more information. ## 📖 How to use the App @@ -119,11 +119,6 @@ _Not all options working now!_ --whisper_compute_type Change to "int8" if low on GPU mem (may reduce accuracy) >> ((default) is "float16" for cuda devices, "int8" for cpu) --keep_numbers Numbers will be transcribed as numerics instead of as words - [pitcher] - # Default is crepe - --crepe tiny|full >> ((default) is full) - --crepe_step_size unit is miliseconds >> ((default) is 10) - [extra] --disable_hyphenation Disable word hyphenation. Hyphenation is enabled by default. --disable_separation Disable track separation. Track separation is enabled by default. @@ -143,7 +138,6 @@ _Not all options working now!_ [device] --force_cpu Force all steps to be processed on CPU. --force_whisper_cpu Only whisper will be forced to cpu - --force_crepe_cpu Only crepe will be forced to cpu ``` For standard use, you only need to use [opt]. All other options are optional. @@ -219,14 +213,9 @@ starts at the place or is heard. To disable: ### 👂 Pitcher -Pitching is done with the `crepe` model. -Also consider that a bigger model is more accurate, but also takes longer to pitch. -For just testing you should use `tiny`. -If you want solid accurate, then use the `full` model. - -```commandline --i XYZ --crepe full -``` +Pitching is done with the `SwiftF0` model, which is faster and more accurate than CREPE. +SwiftF0 automatically detects pitch frequencies between 46.875 Hz (G1) and 2093.75 Hz (C7). +UltraSinger uses 60hz and 400hz ### 👄 Separation @@ -285,33 +274,24 @@ this MIDI and sheet are created. And you also want to have accurate files With a GPU you can speed up the process. Also the quality of the transcription and pitching is better. -You need a cuda device for this to work. Sorry, there is no cuda device for macOS. - -It is optional (but recommended) to install the cuda driver for your gpu: see [driver](https://developer.nvidia.com/cuda-downloads). -Install torch with cuda separately in your `venv`. See [tourch+cuda](https://pytorch.org/get-started/locally/). -Also check you GPU cuda support. See [cuda support](https://gist.github.com/standaloneSA/99788f30466516dbcc00338b36ad5acf) +You need an NVIDIA CUDA device for this to work. Sorry, there is no CUDA device for macOS. -Command for `pip`: -``` -pip3 install torch==2.0.1+cu117 torchvision==0.15.2+cu117 torchaudio==2.0.2+cu117 --index-url https://download.pytorch.org/whl/cu117 -``` - -When you want to use `conda` instead you need a [different installation command](https://pytorch.org/get-started/locally/). +For GPU support on Windows and Linux, the installation script automatically installs PyTorch with CUDA support. -#### Considerations for Windows users +It is optional (but recommended) to install the CUDA driver for your GPU: see [CUDA driver](https://developer.nvidia.com/cuda-downloads). +Also check your GPU CUDA support. See [CUDA support](https://gist.github.com/standaloneSA/99788f30466516dbcc00338b36ad5acf) -The pitch tracker used by UltraSinger (crepe) uses TensorFlow as its backend. -TensorFlow dropped GPU support for Windows for versions >2.10 as you can see in this [release note](https://github.com/tensorflow/tensorflow/releases/tag/v2.11.1) and their [installation instructions](https://www.tensorflow.org/install/pip#windows-native). - -For now UltraSinger runs the latest version available that still supports GPUs on windows. +For manual installation, you can use: +```bash +uv pip install --index-url https://download.pytorch.org/whl/cu121 torch torchvision torchaudio +``` -For running later versions of TensorFlow on windows while still taking advantage of GPU support the suggested solution is to [run UltraSinger in a container](container/README.md). #### Crashes due to low VRAM -If something crashes because of low VRAM then use a smaller model. +If something crashes because of low VRAM then use a smaller Whisper model. Whisper needs more than 8GB VRAM in the `large` model! -You can also force cpu usage with the extra option `--force_cpu`. +You can also force CPU usage with the extra option `--force_cpu`. ### 📦 Containerized (Docker or Podman) diff --git a/ReleaseNotes.md b/ReleaseNotes.md index 16530c87..d282a6f6 100644 --- a/ReleaseNotes.md +++ b/ReleaseNotes.md @@ -14,6 +14,9 @@ Date: 2026.02.10 - Support for video as input - Optimise scale detection - Added quantization by key + - Changed installer to uv + - Drop crepe for SwiftF0 + - upgrade to python 3.12 # Version: 0.0.12 Date: 2024.12.19 diff --git a/colab/UltraSinger.ipynb b/colab/UltraSinger.ipynb index 010891fd..5b3aec0d 100644 --- a/colab/UltraSinger.ipynb +++ b/colab/UltraSinger.ipynb @@ -25,19 +25,35 @@ "outputs": [], "source": [ "%cd /content\n", + "\n", + "# Remove existing directory if present\n", + "#!rm -rf UltraSinger\n", + "\n", + "# Clone specific branch\n", "!git clone https://github.com/rakuri255/UltraSinger.git\n", "%cd /content/UltraSinger\n", - "!pip install --no-cache-dir -r requirements-linux.txt\n", - "!pip install --no-cache-dir torch==2.3.0 torchvision==0.18.0 torchaudio==2.3.0 --index-url https://download.pytorch.org/whl/cu121\n", - "!pip install --no-cache-dir tensorflow[and-cuda]==2.17.1\n", + "\n", + "# Install uv\n", + "!curl -LsSf https://astral.sh/uv/install.sh | sh\n", + "\n", + "# Add uv to PATH and configure matplotlib\n", + "import os\n", + "os.environ['PATH'] = f\"/root/.local/bin:{os.environ['PATH']}\"\n", + "os.environ['UV_LINK_MODE'] = 'copy'\n", + "os.environ['MPLBACKEND'] = 'Agg'\n", + "\n", + "# Sync dependencies from pyproject.toml\n", + "!uv sync\n", + "\n", + "# Install PyTorch with CUDA support\n", + "!uv pip install torch==2.8.0 torchvision==0.23.0 torchaudio==2.8.0 --index-url https://download.pytorch.org/whl/cu128 --force-reinstall\n", + "\n", "%cd /content/UltraSinger/src" ] }, { "cell_type": "code", - "source": [ - "!python UltraSinger.py -i https://www.youtube.com/watch?v=YwNs1Z0qRY0 -o /content/output" - ], + "source": "!../.venv/bin/python UltraSinger.py -i https://www.youtube.com/watch?v=YwNs1Z0qRY0 -o /content/output\n", "metadata": { "id": "O0j4vUW0YAG2" }, diff --git a/install/CPU/linux_cpu.sh b/install/CPU/linux_cpu.sh index 20f15729..4d251c48 100644 --- a/install/CPU/linux_cpu.sh +++ b/install/CPU/linux_cpu.sh @@ -1,7 +1,35 @@ #!/bin/bash -cd .. -cd .. -python3.10 -m venv .venv -source .venv/bin/activate -pip install -r requirements-linux.txt -pip install torch==2.3.1 torchvision==0.18.1 torchaudio==2.3.1 +set -e + +cd "$(dirname "$0")" +cd ../.. + +# Set link mode to copy to avoid hardlink warnings +export UV_LINK_MODE=copy + +# Install uv if not already installed +if ! command -v uv &> /dev/null; then + echo "Installing uv..." + curl -LsSf https://astral.sh/uv/install.sh | sh + # Update PATH for current session + export PATH="$HOME/.local/bin:$PATH" +fi + +# Verify uv is available +if ! command -v uv &> /dev/null; then + echo "Error: uv could not be found or installed" + echo "Please ensure your shell PATH includes ~/.local/bin" + exit 1 +fi + +echo "uv version:" +uv --version + +echo "Syncing dependencies with uv..." +uv sync --extra linux + +echo "Installation completed successfully!" +echo "To run UltraSinger:" +echo " source .venv/bin/activate" +echo " cd src" +echo " py UltraSinger.py" diff --git a/install/CPU/macos_cpu.sh b/install/CPU/macos_cpu.sh index d21a3b2e..7dc335c1 100755 --- a/install/CPU/macos_cpu.sh +++ b/install/CPU/macos_cpu.sh @@ -1,7 +1,35 @@ #!/bin/bash -cd .. -cd .. -python3.10 -m venv .venv -source .venv/bin/activate -pip install -r requirements-macos.txt -pip install torch==2.3.1 torchvision==0.18.1 torchaudio==2.3.1 +set -e + +cd "$(dirname "$0")" +cd ../.. + +# Set link mode to copy to avoid hardlink warnings +export UV_LINK_MODE=copy + +# Install uv if not already installed +if ! command -v uv &> /dev/null; then + echo "Installing uv..." + curl -LsSf https://astral.sh/uv/install.sh | sh + # Update PATH for current session + export PATH="$HOME/.local/bin:$PATH" +fi + +# Verify uv is available +if ! command -v uv &> /dev/null; then + echo "Error: uv could not be found or installed" + echo "Please ensure your shell PATH includes ~/.local/bin" + exit 1 +fi + +echo "uv version:" +uv --version + +echo "Syncing dependencies with uv..." +uv sync --extra macos + +echo "Installation completed successfully!" +echo "To run UltraSinger:" +echo " source .venv/bin/activate" +echo " cd src" +echo " py UltraSinger.py" diff --git a/install/CPU/windows_cpu.bat b/install/CPU/windows_cpu.bat index 839845e1..bc0b055e 100644 --- a/install/CPU/windows_cpu.bat +++ b/install/CPU/windows_cpu.bat @@ -1,9 +1,97 @@ @echo off -setlocal -cd .. -cd .. -py -3.10 -m venv .venv -SET VenvPythonPath=%CD%\.venv\Scripts\python.exe -call %VenvPythonPath% -m pip install -r requirements-windows.txt -call %VenvPythonPath% -m pip install torch==2.3.1 torchvision==0.18.1 torchaudio==2.3.1 -endlocal \ No newline at end of file +setlocal enabledelayedexpansion + +:: Set link mode to copy to avoid hardlink warnings on different filesystems +set UV_LINK_MODE=copy + +:: Navigate to project root +pushd "%~dp0" +cd /d ..\.. +echo Current directory: %cd% + +:: Update PATH to include uv installation directory +set "PATH=%USERPROFILE%\.local\bin;!PATH!" + +:: Remove old virtual environment if it exists to force recreation with correct Python version +::if exist .venv ( +:: echo Removing old virtual environment... +:: rmdir /s /q .venv +::) + +:: First, find Python using to get full path +set "PYTHON_EXE=" + +for %%V in (3.12) do ( + py -%%V --version >nul 2>&1 + if !errorlevel! equ 0 ( + :: Get the full path to the Python executable + for /f "delims=" %%P in ('py -%%V -c "import sys; print(sys.executable)"') do ( + set "PYTHON_EXE=%%P" + ) + goto :found_python + ) +) + +:: Fallback to direct Python installations +for %%P in (python3.12 python3 python) do ( + where %%P >nul 2>&1 + if !errorlevel! equ 0 ( + set "PYTHON_EXE=%%P" + goto :found_python + ) +) + +:found_python +if "!PYTHON_EXE!"=="" ( + echo Error: No Python 3.12 installation found + echo Please install Python 3.12 from python.org + echo Note: Python 3.13 is not yet supported due to dependency constraints + pause + exit /b 1 +) + +echo Using Python: !PYTHON_EXE! +!PYTHON_EXE! --version + +:: Install uv if not already installed +where uv >nul 2>&1 +if !errorlevel! neq 0 ( + echo Installing uv... + powershell -NoProfile -Command "irm https://astral.sh/uv/install.ps1 | iex" +) + +:: Wait a moment for uv to be available +timeout /t 2 /nobreak >nul + +:: Verify uv is available +where uv >nul 2>&1 +if !errorlevel! neq 0 ( + echo Error: uv could not be found or installed + pause + exit /b 1 +) + +echo uv is ready +uv --version + +echo Syncing dependencies with uv... +uv sync --extra windows --python !PYTHON_EXE! +if !errorlevel! neq 0 ( + echo Error during uv sync + pause + exit /b 1 +) + +echo Installing PyTorch CPU version... +:: First remove any existing torch installation to avoid RECORD file issues +uv pip uninstall torch torchvision torchaudio -y 2>nul +:: Install PyTorch CPU version +uv pip install --index-url https://download.pytorch.org/whl/cpu torch torchvision torchaudio +if !errorlevel! neq 0 ( + echo Error during PyTorch installation + pause + exit /b 1 +) + +echo Installation completed successfully! +pause \ No newline at end of file diff --git a/install/CUDA/linux_cuda_gpu.sh b/install/CUDA/linux_cuda_gpu.sh index ced7a2bc..83e153aa 100644 --- a/install/CUDA/linux_cuda_gpu.sh +++ b/install/CUDA/linux_cuda_gpu.sh @@ -1,7 +1,40 @@ #!/bin/bash -cd .. -cd .. -python3.10 -m venv .venv -source .venv/bin/activate -pip install -r requirements-linux.txt -pip install torch==2.3.1+cu118 torchvision==0.18.1+cu118 torchaudio==2.3.1+cu118 --index-url https://download.pytorch.org/whl/cu118 +set -e + +cd "$(dirname "$0")" +cd ../.. + +# Set link mode to copy to avoid hardlink warnings on different filesystems +export UV_LINK_MODE=copy + +# Install uv if not already installed +if ! command -v uv &> /dev/null; then + echo "Installing uv..." + curl -LsSf https://astral.sh/uv/install.sh | sh + # Update PATH for current session + export PATH="$HOME/.local/bin:$PATH" +fi + +# Verify uv is available +if ! command -v uv &> /dev/null; then + echo "Error: uv could not be found or installed" + echo "Please ensure your shell PATH includes ~/.local/bin" + exit 1 +fi + +echo "uv version:" +uv --version + +# Sync dependencies from pyproject.toml +echo "Syncing dependencies from pyproject.toml..." +uv sync --extra linux + +# Install PyTorch with CUDA support +echo "Installing PyTorch with CUDA support..." +uv pip install torch==2.8.0 torchvision==0.23.0 torchaudio==2.8.0 --index-url https://download.pytorch.org/whl/cu128 --force-reinstall + +echo "Installation completed successfully!" +echo "To run UltraSinger:" +echo " source .venv/bin/activate" +echo " cd src" +echo " python UltraSinger.py" diff --git a/install/CUDA/windows_cuda_gpu.bat b/install/CUDA/windows_cuda_gpu.bat index 71200958..fb129b02 100644 --- a/install/CUDA/windows_cuda_gpu.bat +++ b/install/CUDA/windows_cuda_gpu.bat @@ -1,9 +1,98 @@ @echo off -setlocal -cd .. -cd .. -py -3.10 -m venv .venv -SET VenvPythonPath=%CD%\.venv\Scripts\python.exe -call %VenvPythonPath% -m pip install -r requirements-windows.txt -call %VenvPythonPath% -m pip install torch==2.3.1+cu118 torchvision==0.18.1+cu118 torchaudio==2.3.1+cu118 --index-url https://download.pytorch.org/whl/cu118 -endlocal \ No newline at end of file +setlocal enabledelayedexpansion + +:: Set link mode to copy to avoid hardlink warnings on different filesystems +set UV_LINK_MODE=copy + +:: Navigate to project root +pushd "%~dp0" +cd /d ..\.. +echo Current directory: %cd% + +:: Update PATH to include uv installation directory +set "PATH=%USERPROFILE%\.local\bin;!PATH!" + +:: Remove old virtual environment if it exists to force recreation with correct Python version +::if exist .venv ( +:: echo Removing old virtual environment... +:: rmdir /s /q .venv +::) + +:: First, find Python using to get full path +set "PYTHON_EXE=" + +for %%V in (3.12) do ( + py -%%V --version >nul 2>&1 + if !errorlevel! equ 0 ( + :: Get the full path to the Python executable + for /f "delims=" %%P in ('py -%%V -c "import sys; print(sys.executable)"') do ( + set "PYTHON_EXE=%%P" + ) + goto :found_python + ) +) + +:: Fallback to direct Python installations +for %%P in (python3.12 python3 python) do ( + where %%P >nul 2>&1 + if !errorlevel! equ 0 ( + set "PYTHON_EXE=%%P" + goto :found_python + ) +) + +:found_python +if "!PYTHON_EXE!"=="" ( + echo Error: No Python 3.12 installation found + echo Please install Python 3.12 from python.org + echo Note: Python 3.13 is not yet supported due to dependency constraints + pause + exit /b 1 +) + +echo Using Python: !PYTHON_EXE! +!PYTHON_EXE! --version + +:: Install uv if not already installed +where uv >nul 2>&1 +if !errorlevel! neq 0 ( + echo Installing uv... + powershell -NoProfile -Command "irm https://astral.sh/uv/install.ps1 | iex" +) + +:: Wait a moment for uv to be available +timeout /t 2 /nobreak >nul + +:: Verify uv is available +where uv >nul 2>&1 +if !errorlevel! neq 0 ( + echo Error: uv could not be found or installed + pause + exit /b 1 +) + +echo uv is ready +uv --version + +echo Syncing dependencies with uv... +uv sync --extra windows --python !PYTHON_EXE! +if !errorlevel! neq 0 ( + echo Error during uv sync + pause + exit /b 1 +) + +echo Installing PyTorch with CUDA support (as recommended by WhisperX)... +:: First remove any existing torch installation to avoid RECORD file issues +uv pip uninstall torch torchvision torchaudio -y 2>nul +:: Install PyTorch 2.8.0 with CUDA for WhisperX compatibility +:: This version includes cuDNN 9.x which is compatible with the latest PyTorch +uv pip install torch==2.8.0 torchvision==0.23.0 torchaudio==2.8.0 --index-url https://download.pytorch.org/whl/cu128 --force-reinstall +if !errorlevel! neq 0 ( + echo Error during PyTorch installation + pause + exit /b 1 +) + +echo Installation completed successfully! +pause diff --git a/install/fix_cuda11_libs.bat b/install/fix_cuda11_libs.bat new file mode 100644 index 00000000..22f4dbff --- /dev/null +++ b/install/fix_cuda11_libs.bat @@ -0,0 +1,100 @@ +@echo off +echo ======================================== +echo CUDA 11.x Libraries Fix for WhisperX/Pyannote +echo ======================================== +echo. +echo This script will download and install CUDA 11.x DLLs +echo that are required by pyannote.audio (used by WhisperX) +echo. +echo PyTorch 2.8.0 includes CUDA 12.x, but pyannote.audio +echo still requires CUDA 11.x DLLs to function properly. +echo. + +cd /d "%~dp0\.." + +echo Checking for virtual environment... +if not exist ".venv\Scripts\activate.bat" ( + echo Error: Virtual environment not found at .venv + echo Please create the virtual environment first. + pause + exit /b 1 +) + +echo Activating virtual environment... +call .venv\Scripts\activate.bat + +if %errorlevel% neq 0 ( + echo Error activating virtual environment + pause + exit /b 1 +) + +echo Creating temporary directory... +if not exist temp_cuda11 mkdir temp_cuda11 +cd temp_cuda11 + +echo. +echo Downloading CUDA 11.8 Toolkit Libraries (Windows)... +echo This may take a few minutes... +echo. +echo Downloading cuBLAS 11.x... +powershell -Command "& {[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12; Invoke-WebRequest -Uri 'https://developer.download.nvidia.com/compute/cuda/redist/libcublas/windows-x86_64/libcublas-windows-x86_64-11.11.3.6-archive.zip' -OutFile 'cublas.zip'}" + +if %errorlevel% neq 0 ( + echo Error downloading cuBLAS 11.x + echo. + echo Please check your internet connection or download manually from: + echo https://developer.download.nvidia.com/compute/cuda/redist/libcublas/windows-x86_64/ + echo. + cd .. + deactivate + pause + exit /b 1 +) + +echo. +echo Extracting CUDA 11.x Libraries... +if exist cublas.zip ( + powershell -Command "Expand-Archive -Path 'cublas.zip' -DestinationPath '.' -Force" +) + +echo. +echo Copying CUDA 11.x DLLs to PyTorch installation... +set "COPIED=0" + +if exist "libcublas-windows-x86_64-11.11.3.6-archive\bin\*.dll" ( + copy /Y "libcublas-windows-x86_64-11.11.3.6-archive\bin\*.dll" "%VIRTUAL_ENV%\Lib\site-packages\torch\lib\" + set "COPIED=1" +) + +if "%COPIED%"=="0" ( + echo Error: Could not find CUDA 11.x DLLs in extracted files + echo Expected path: libcublas-windows-x86_64-11.11.3.6-archive\bin\ + cd .. + deactivate + pause + exit /b 1 +) + +echo. +echo Successfully copied CUDA 11.x DLLs! +echo Listing copied cuBLAS files: +dir "%VIRTUAL_ENV%\Lib\site-packages\torch\lib\cublas*.dll" /b 2>nul + +echo. +echo Cleaning up temporary files... +cd .. +rmdir /s /q temp_cuda11 + +echo Deactivating virtual environment... +deactivate + +echo. +echo ======================================== +echo CUDA 11.x Fix completed successfully! +echo ======================================== +echo. +echo You can now run UltraSinger without CUDA 11.x DLL errors. +echo. +pause + diff --git a/install/fix_cudnn8.bat b/install/fix_cudnn8.bat new file mode 100644 index 00000000..2f265a1b --- /dev/null +++ b/install/fix_cudnn8.bat @@ -0,0 +1,102 @@ +@echo off +echo ======================================== +echo cuDNN 8.x Fix for WhisperX/Pyannote +echo ======================================== +echo. +echo This script will download and install cuDNN 8.x DLLs +echo that are required by pyannote.audio (used by WhisperX) +echo. +echo PyTorch 2.8.0 includes cuDNN 9.x, but pyannote.audio +echo still requires cuDNN 8.x DLLs to function properly. +echo. + +cd /d "%~dp0\.." + +echo Checking for virtual environment... +if not exist ".venv\Scripts\activate.bat" ( + echo Error: Virtual environment not found at .venv + echo Please create the virtual environment first. + pause + exit /b 1 +) + +echo Activating virtual environment... +call .venv\Scripts\activate.bat + +if %errorlevel% neq 0 ( + echo Error activating virtual environment + pause + exit /b 1 +) + +echo Creating temporary directory... +if not exist temp_cudnn mkdir temp_cudnn +cd temp_cudnn + +echo. +echo Downloading cuDNN 8.9.7 for CUDA 11.x (Windows)... +echo This may take a few minutes... +powershell -Command "& {[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12; Invoke-WebRequest -Uri 'https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/windows-x86_64/cudnn-windows-x86_64-8.9.7.29_cuda11-archive.zip' -OutFile 'cudnn.zip'}" + +if %errorlevel% neq 0 ( + echo Error downloading cuDNN 8.9.7 + echo. + echo Alternative: Trying to extract from installed torch packages... + echo Searching for existing PyTorch installations with cuDNN 8.x... + + REM Try to find cuDNN 8 DLLs in pip cache or other Python installations + powershell -Command "& {$found = $false; Get-ChildItem -Path $env:LOCALAPPDATA\pip\cache -Recurse -Filter 'cudnn64_8.dll' -ErrorAction SilentlyContinue | ForEach-Object {echo $_.FullName; $found = $true}; if (-not $found) {exit 1}}" + + if %errorlevel% neq 0 ( + echo Error: Could not find or download cuDNN 8.x DLLs + echo. + echo Please manually download cuDNN 8.9.7 from NVIDIA: + echo https://developer.nvidia.com/rdp/cudnn-archive + echo. + cd .. + deactivate + pause + exit /b 1 + ) +) + +echo. +echo Extracting cuDNN 8.x DLLs... +if exist cudnn.zip ( + powershell -Command "Expand-Archive -Path 'cudnn.zip' -DestinationPath '.' -Force" +) + +echo. +echo Copying cuDNN 8.x DLLs to PyTorch installation... +if exist "cudnn-windows-x86_64-8.9.7.29_cuda11-archive\bin\cudnn*.dll" ( + copy /Y "cudnn-windows-x86_64-8.9.7.29_cuda11-archive\bin\cudnn*.dll" "%VIRTUAL_ENV%\Lib\site-packages\torch\lib\" + echo. + echo Successfully copied cuDNN 8.x DLLs! + echo Listing copied files: + dir "%VIRTUAL_ENV%\Lib\site-packages\torch\lib\cudnn*.dll" /b +) else ( + echo Error: Could not find cuDNN DLLs in extracted files + echo Expected path: cudnn-windows-x86_64-8.9.7.29_cuda11-archive\bin\ + cd .. + deactivate + pause + exit /b 1 +) + +echo. +echo Cleaning up temporary files... +cd .. +rmdir /s /q temp_cudnn + +echo Deactivating virtual environment... +deactivate + +echo. +echo ======================================== +echo cuDNN 8.x Fix completed successfully! +echo ======================================== +echo. +echo You can now run UltraSinger without cuDNN errors. +echo. +pause + diff --git a/pyproject.toml b/pyproject.toml index a2c5dee3..cd7a328d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,21 +1,58 @@ [build-system] -requires = ["setuptools", "setuptools-scm"] -build-backend = "setuptools.build_meta" +requires = ["hatchling", "setuptools", "wheel", "setuptools_scm"] +build-backend = "hatchling.build" [project] name = "UltraSinger" -dynamic = ["dependencies", "optional-dependencies"] -requires-python = "==3.10" - -[tool.setuptools.dynamic] -dependencies = { file = ["requirements.in"] } -optional-dependencies.test = { file = ["requirements-test.txt"] } +version = "0.0.13.dev14" +description = "A tool to create UltraStar karaoke files from audio files" +requires-python = ">=3.12" +dependencies = [ + "setuptools", + "scipy", + "whisperx", + "num2words", + "inputimeout", + "langcodes", + "language_data", + "packaging", + "librosa>=0.10.2", + "numba>=0.59.0", + "swift-f0", + "pydub", + "demucs", + "ffmpeg-python", + "matplotlib", + "musicbrainzngs", + "python-Levenshtein", + "pretty-midi", + "unidecode", + "pyhyphen", + "tqdm", + "yt-dlp", + "music21", + "dataclasses", + "dataclasses-json", + "torch==2.8.0", + "torchaudio==2.8.0", + "torchvision==0.23.0", +] [project.optional-dependencies] -dev = ["pytest"] +windows = [] +linux = [] +macos = [] +dev = ["pytest", "isort", "black", "pylint"] + +[dependency-groups] +dev = ["pytest", "isort", "black", "pylint"] + +[tool.hatch.build.targets.wheel] +packages = ["src"] [tool.isort] profile = "black" + [tool.pytest.ini_options] pythonpath = [ "src", diff --git a/pytest/modules/Pitcher/test_pitcher.py b/pytest/modules/Pitcher/test_pitcher.py index e6239860..3413fe16 100644 --- a/pytest/modules/Pitcher/test_pitcher.py +++ b/pytest/modules/Pitcher/test_pitcher.py @@ -18,7 +18,7 @@ def test_get_pitch_with_crepe_file(self): test_output = root_dir + "/test_output" # Act - pitched_data = test_subject.get_pitch_with_crepe_file(test_file_abs_path, 'full', device="cuda") + pitched_data = test_subject.get_pitch_with_file(test_file_abs_path) # test_subject.get_pitch_with_crepe_file(test_file_abs_path, 'full', 'cpu', batch_size=1024) plot(pitched_data, test_output, title="pitching test") print("done") diff --git a/pytest/modules/UltraStar/test_ultrastar_parser.py b/pytest/modules/UltraStar/test_ultrastar_parser.py index 3bc20a0f..15c4b06e 100644 --- a/pytest/modules/UltraStar/test_ultrastar_parser.py +++ b/pytest/modules/UltraStar/test_ultrastar_parser.py @@ -29,7 +29,8 @@ def test_parse_ultrastar_txt(self, mock_create_folder, mock_dirname, mock_parse) self.assertEqual(result, ("Test Artist - Test Title", os.path.join("path", "to", "output", "Test Artist - Test Title"), os.path.join("path", "to", "input", "test.mp3"), - mock_parse.return_value)) + mock_parse.return_value, + "mp3")) # mock_parse.assert_called_once() mock_dirname.assert_called_once() diff --git a/pytest/modules/UltraStar/test_ultrastar_writer.py b/pytest/modules/UltraStar/test_ultrastar_writer.py index 51a94d95..2d4cc52b 100644 --- a/pytest/modules/UltraStar/test_ultrastar_writer.py +++ b/pytest/modules/UltraStar/test_ultrastar_writer.py @@ -104,26 +104,26 @@ def act_and_assert(self, bpm, default_ultrastar_class, expected_calls_default_va def default_values(default_ultrastar_class, ver): expected_calls = [] if version.parse(ver) >= version.parse("1.0.0"): - expected_calls.append(f"#{UltrastarTxtTag.VERSION}:{default_ultrastar_class.version}\n") + expected_calls.append(f"#{UltrastarTxtTag.VERSION.value}:{default_ultrastar_class.version}\n") expected_calls += [ - f"#{UltrastarTxtTag.ARTIST}:{default_ultrastar_class.artist}\n", - f"#{UltrastarTxtTag.TITLE}:{default_ultrastar_class.title}\n", - f"#{UltrastarTxtTag.MP3}:{default_ultrastar_class.mp3}\n" + f"#{UltrastarTxtTag.ARTIST.value}:{default_ultrastar_class.artist}\n", + f"#{UltrastarTxtTag.TITLE.value}:{default_ultrastar_class.title}\n", + f"#{UltrastarTxtTag.MP3.value}:{default_ultrastar_class.mp3}\n" ] if version.parse(ver) >= version.parse("1.1.0"): - expected_calls += [f"#{UltrastarTxtTag.AUDIO}:{default_ultrastar_class.audio}\n"] + expected_calls += [f"#{UltrastarTxtTag.AUDIO.value}:{default_ultrastar_class.audio}\n"] if default_ultrastar_class.video is not None: expected_calls += [ - f"#{UltrastarTxtTag.VIDEO}:{default_ultrastar_class.video}\n" + f"#{UltrastarTxtTag.VIDEO.value}:{default_ultrastar_class.video}\n" ] if version.parse(ver) >= version.parse("1.2.0"): if default_ultrastar_class.videoUrl is not None: - expected_calls += [f"#{UltrastarTxtTag.VIDEOURL}:{default_ultrastar_class.videoUrl}\n"] + expected_calls += [f"#{UltrastarTxtTag.VIDEOURL.value}:{default_ultrastar_class.videoUrl}\n"] expected_calls += [ - f"#{UltrastarTxtTag.BPM}:390.0\n", - f"#{UltrastarTxtTag.GAP}:500\n", - f"#{UltrastarTxtTag.CREATOR}:{default_ultrastar_class.creator}\n", - f"#{UltrastarTxtTag.COMMENT}:{default_ultrastar_class.comment}\n", + f"#{UltrastarTxtTag.BPM.value}:390.0\n", + f"#{UltrastarTxtTag.GAP.value}:500\n", + f"#{UltrastarTxtTag.CREATOR.value}:{default_ultrastar_class.creator}\n", + f"#{UltrastarTxtTag.COMMENT.value}:{default_ultrastar_class.comment}\n", ": 0 52 1 UltraSinger \n", ": 65 39 2 is \n", ": 130 52 3 cool! \n", @@ -136,26 +136,27 @@ def default_values(default_ultrastar_class, ver): def full_values(default_ultrastar_class, ver): expected_calls = [] if version.parse(ver) >= version.parse("1.0.0"): - expected_calls.append(f"#{UltrastarTxtTag.VERSION}:{default_ultrastar_class.version}\n") - expected_calls.append(f"#{UltrastarTxtTag.ARTIST}:{default_ultrastar_class.artist}\n") - expected_calls.append(f"#{UltrastarTxtTag.TITLE}:{default_ultrastar_class.title}\n") - expected_calls.append(f"#{UltrastarTxtTag.YEAR}:{default_ultrastar_class.year}\n") - expected_calls.append(f"#{UltrastarTxtTag.LANGUAGE}:German\n") - expected_calls.append(f"#{UltrastarTxtTag.GENRE}:{default_ultrastar_class.genre}\n") - expected_calls.append(f"#{UltrastarTxtTag.COVER}:{default_ultrastar_class.cover}\n") - expected_calls.append(f"#{UltrastarTxtTag.MP3}:{default_ultrastar_class.mp3}\n") + expected_calls.append(f"#{UltrastarTxtTag.VERSION.value}:{default_ultrastar_class.version}\n") + expected_calls.append(f"#{UltrastarTxtTag.ARTIST.value}:{default_ultrastar_class.artist}\n") + expected_calls.append(f"#{UltrastarTxtTag.TITLE.value}:{default_ultrastar_class.title}\n") + expected_calls.append(f"#{UltrastarTxtTag.YEAR.value}:{default_ultrastar_class.year}\n") + expected_calls.append(f"#{UltrastarTxtTag.LANGUAGE.value}:German\n") + expected_calls.append(f"#{UltrastarTxtTag.GENRE.value}:{default_ultrastar_class.genre}\n") + expected_calls.append(f"#{UltrastarTxtTag.COVER.value}:{default_ultrastar_class.cover}\n") + expected_calls.append(f"#{UltrastarTxtTag.MP3.value}:{default_ultrastar_class.mp3}\n") if version.parse(ver) >= version.parse("1.1.0"): - expected_calls.append(f"#{UltrastarTxtTag.AUDIO}:{default_ultrastar_class.audio}\n") - expected_calls.append(f"#{UltrastarTxtTag.VOCALS}:{default_ultrastar_class.vocals}\n") - expected_calls.append(f"#{UltrastarTxtTag.INSTRUMENTAL}:{default_ultrastar_class.instrumental}\n") - expected_calls.append(f"#{UltrastarTxtTag.TAGS}:{default_ultrastar_class.tags}\n") - expected_calls.append(f"#{UltrastarTxtTag.VIDEO}:{default_ultrastar_class.video}\n") + expected_calls.append(f"#{UltrastarTxtTag.AUDIO.value}:{default_ultrastar_class.audio}\n") + expected_calls.append(f"#{UltrastarTxtTag.VOCALS.value}:{default_ultrastar_class.vocals}\n") + expected_calls.append(f"#{UltrastarTxtTag.INSTRUMENTAL.value}:{default_ultrastar_class.instrumental}\n") + expected_calls.append(f"#{UltrastarTxtTag.VIDEO.value}:{default_ultrastar_class.video}\n") if version.parse(ver) >= version.parse("1.2.0"): - expected_calls.append(f"#{UltrastarTxtTag.VIDEOURL}:{default_ultrastar_class.videoUrl}\n") - expected_calls.append(f"#{UltrastarTxtTag.BPM}:390.0\n") - expected_calls.append(f"#{UltrastarTxtTag.GAP}:500\n") - expected_calls.append(f"#{UltrastarTxtTag.CREATOR}:{default_ultrastar_class.creator}\n") - expected_calls.append(f"#{UltrastarTxtTag.COMMENT}:{default_ultrastar_class.comment}\n") + expected_calls.append(f"#{UltrastarTxtTag.VIDEOURL.value}:{default_ultrastar_class.videoUrl}\n") + expected_calls.append(f"#{UltrastarTxtTag.BPM.value}:390.0\n") + expected_calls.append(f"#{UltrastarTxtTag.GAP.value}:500\n") + if version.parse(ver) >= version.parse("1.1.0"): + expected_calls.append(f"#{UltrastarTxtTag.TAGS.value}:{default_ultrastar_class.tags}\n") + expected_calls.append(f"#{UltrastarTxtTag.CREATOR.value}:{default_ultrastar_class.creator}\n") + expected_calls.append(f"#{UltrastarTxtTag.COMMENT.value}:{default_ultrastar_class.comment}\n") expected_calls.append(": 0 52 1 UltraSinger \n") expected_calls.append(": 65 39 2 is \n") expected_calls.append(": 130 52 3 cool! \n") diff --git a/requirements-linux.txt b/requirements-linux.txt deleted file mode 100644 index 6ef50721..00000000 --- a/requirements-linux.txt +++ /dev/null @@ -1,648 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.10 -# by the following command: -# -# pip-compile --output-file=requirements-linux.txt -# -absl-py==2.3.0 - # via - # keras - # tensorboard - # tensorflow -aiohappyeyeballs==2.6.1 - # via aiohttp -aiohttp==3.12.9 - # via fsspec -aiosignal==1.3.2 - # via aiohttp -alembic==1.16.1 - # via optuna -antlr4-python3-runtime==4.9.3 - # via omegaconf -appdirs==1.4.4 - # via pyhyphen -asteroid-filterbanks==0.4.0 - # via pyannote-audio -astroid==3.3.10 - # via pylint -astunparse==1.6.3 - # via tensorflow -async-timeout==5.0.1 - # via aiohttp -attrs==25.3.0 - # via aiohttp -audioread==3.0.1 - # via librosa -av==14.4.0 - # via faster-whisper -black==25.1.0 - # via -r requirements.in -certifi==2025.4.26 - # via requests -cffi==1.17.1 - # via soundfile -chardet==5.2.0 - # via music21 -charset-normalizer==3.4.2 - # via requests -click==8.2.1 - # via - # black - # nltk - # typer -cloudpickle==3.1.1 - # via submitit -colorama==0.4.6 - # via - # click - # colorlog - # pylint - # pytest - # tqdm -coloredlogs==15.0.1 - # via onnxruntime -colorlog==6.9.0 - # via optuna -contourpy==1.3.2 - # via matplotlib -crepe==0.0.16 - # via -r requirements.in -ctranslate2==4.4.0 - # via - # faster-whisper - # whisperx -cycler==0.12.1 - # via matplotlib -dataclasses==0.6 - # via -r requirements.in -dataclasses-json==0.6.7 - # via -r requirements.in -decorator==5.2.1 - # via librosa -demucs==4.0.1 - # via -r requirements.in -dill==0.4.0 - # via pylint -docopt==0.6.2 - # via - # num2words - # pyannote-metrics - # pyannote-pipeline -dora-search==0.1.12 - # via demucs -einops==0.8.1 - # via - # demucs - # pyannote-audio -exceptiongroup==1.3.0 - # via pytest -faster-whisper==1.1.0 - # via whisperx -ffmpeg-python==0.2.0 - # via -r requirements.in -filelock==3.18.0 - # via - # huggingface-hub - # pyannote-pipeline - # torch - # transformers -flatbuffers==25.2.10 - # via - # onnxruntime - # tensorflow -fonttools==4.58.1 - # via matplotlib -frozenlist==1.6.2 - # via - # aiohttp - # aiosignal -fsspec[http]==2025.5.1 - # via - # huggingface-hub - # lightning - # pytorch-lightning - # torch -future==1.0.0 - # via ffmpeg-python -gast==0.6.0 - # via tensorflow -google-pasta==0.2.0 - # via tensorflow -greenlet==3.2.3 - # via sqlalchemy -grpcio==1.72.1 - # via - # tensorboard - # tensorflow -h5py==3.13.0 - # via - # crepe - # keras - # tensorflow -hmmlearn==0.3.3 - # via crepe -huggingface-hub==0.32.4 - # via - # faster-whisper - # pyannote-audio - # speechbrain - # tokenizers - # transformers -humanfriendly==10.0 - # via coloredlogs -hyperpyyaml==1.2.2 - # via speechbrain -idna==3.10 - # via - # requests - # yarl -imageio==2.37.0 - # via crepe -iniconfig==2.1.0 - # via pytest -inputimeout==1.0.4 - # via -r requirements.in -isort==6.0.1 - # via - # -r requirements.in - # pylint -jinja2==3.1.6 - # via torch -joblib==1.5.1 - # via - # librosa - # music21 - # nltk - # scikit-learn - # speechbrain -jsonpickle==4.1.1 - # via music21 -julius==0.2.7 - # via - # demucs - # torch-audiomentations -keras==3.10.0 - # via tensorflow -kiwisolver==1.4.8 - # via matplotlib -lameenc==1.8.1 - # via demucs -langcodes==3.5.0 - # via -r requirements.in -language-data==1.3.0 - # via langcodes -levenshtein==0.27.1 - # via python-levenshtein -libclang==18.1.1 - # via tensorflow -librosa==0.9.2 - # via -r requirements.in -lightning==2.5.1.post0 - # via pyannote-audio -lightning-utilities==0.14.3 - # via - # lightning - # pytorch-lightning - # torchmetrics -llvmlite==0.44.0 - # via numba -mako==1.3.10 - # via alembic -marisa-trie==1.2.1 - # via language-data -markdown==3.8 - # via tensorboard -markdown-it-py==3.0.0 - # via rich -markupsafe==3.0.2 - # via - # jinja2 - # mako - # werkzeug -marshmallow==3.26.1 - # via dataclasses-json -matplotlib==3.10.3 - # via - # -r requirements.in - # crepe - # music21 - # pyannote-metrics -mccabe==0.7.0 - # via pylint -mdurl==0.1.2 - # via markdown-it-py -mido==1.3.3 - # via pretty-midi -ml-dtypes==0.5.1 - # via - # keras - # tensorflow -more-itertools==10.7.0 - # via music21 -mpmath==1.3.0 - # via sympy -multidict==6.4.4 - # via - # aiohttp - # yarl -music21==9.7.0 - # via -r requirements.in -musicbrainzngs==0.7.1 - # via -r requirements.in -mypy-extensions==1.1.0 - # via - # black - # typing-inspect -namex==0.1.0 - # via keras -networkx==3.4.2 - # via torch -nltk==3.9.1 - # via whisperx -num2words==0.5.14 - # via -r requirements.in -numba==0.61.2 - # via - # librosa - # resampy -numpy==1.26.4 - # via - # -r requirements.in - # asteroid-filterbanks - # contourpy - # crepe - # ctranslate2 - # h5py - # hmmlearn - # imageio - # keras - # librosa - # matplotlib - # ml-dtypes - # music21 - # numba - # onnxruntime - # openunmix - # optuna - # pandas - # pretty-midi - # pyannote-core - # pyannote-metrics - # pytorch-metric-learning - # resampy - # scikit-learn - # scipy - # soundfile - # speechbrain - # tensorboard - # tensorboardx - # tensorflow - # torchmetrics - # transformers -omegaconf==2.3.0 - # via - # dora-search - # pyannote-audio -onnxruntime==1.22.0 - # via faster-whisper -openunmix==1.3.0 - # via demucs -opt-einsum==3.4.0 - # via tensorflow -optree==0.16.0 - # via keras -optuna==4.3.0 - # via pyannote-pipeline -packaging==24.2 - # via - # -r requirements.in - # black - # huggingface-hub - # keras - # librosa - # lightning - # lightning-utilities - # marshmallow - # matplotlib - # mido - # onnxruntime - # optuna - # pooch - # pytest - # pytorch-lightning - # speechbrain - # tensorboard - # tensorboardx - # tensorflow - # torch-pitch-shift - # torchmetrics - # transformers -pandas==2.3.0 - # via - # pyannote-database - # pyannote-metrics - # whisperx -pathspec==0.12.1 - # via black -pillow==11.2.1 - # via - # imageio - # matplotlib -platformdirs==4.3.8 - # via - # black - # pooch - # pylint -pluggy==1.6.0 - # via pytest -pooch==1.8.2 - # via librosa -pretty-midi==0.2.10 - # via -r requirements.in -primepy==1.3 - # via torch-pitch-shift -propcache==0.3.1 - # via - # aiohttp - # yarl -protobuf==5.29.5 - # via - # onnxruntime - # tensorboard - # tensorboardx - # tensorflow -pyannote-audio==3.3.2 - # via whisperx -pyannote-core==5.0.0 - # via - # pyannote-audio - # pyannote-database - # pyannote-metrics - # pyannote-pipeline -pyannote-database==5.1.3 - # via - # pyannote-audio - # pyannote-metrics - # pyannote-pipeline -pyannote-metrics==3.2.1 - # via pyannote-audio -pyannote-pipeline==3.0.1 - # via pyannote-audio -pycparser==2.22 - # via cffi -pydub==0.25.1 - # via -r requirements.in -pygments==2.19.1 - # via - # pytest - # rich -pyhyphen==4.0.4 - # via -r requirements.in -pylint==3.3.7 - # via -r requirements.in -pyparsing==3.2.3 - # via matplotlib -pyreadline3==3.5.4 - # via humanfriendly -pytest==8.4.0 - # via -r requirements.in -python-dateutil==2.9.0.post0 - # via - # matplotlib - # pandas -python-levenshtein==0.27.1 - # via -r requirements.in -pytorch-lightning==2.5.1.post0 - # via lightning -pytorch-metric-learning==2.8.1 - # via pyannote-audio -pytz==2025.2 - # via pandas -pyyaml==6.0.2 - # via - # ctranslate2 - # demucs - # huggingface-hub - # hyperpyyaml - # lightning - # omegaconf - # optuna - # pyannote-database - # pyannote-pipeline - # pytorch-lightning - # transformers -rapidfuzz==3.13.0 - # via levenshtein -regex==2024.11.6 - # via - # nltk - # transformers -requests==2.32.3 - # via - # huggingface-hub - # music21 - # pooch - # pyhyphen - # tensorflow - # transformers -resampy==0.4.3 - # via - # crepe - # librosa -retrying==1.3.4 - # via dora-search -rich==14.0.0 - # via - # keras - # pyannote-audio - # typer -ruamel-yaml==0.18.12 - # via hyperpyyaml -ruamel-yaml-clib==0.2.12 - # via ruamel-yaml -safetensors==0.5.3 - # via transformers -scikit-learn==1.6.1 - # via - # crepe - # hmmlearn - # librosa - # pyannote-metrics - # pyannote-pipeline - # pytorch-metric-learning -scipy==1.15.3 - # via - # -r requirements.in - # crepe - # hmmlearn - # librosa - # pyannote-core - # pyannote-metrics - # scikit-learn - # speechbrain -semver==3.0.4 - # via pyannote-audio -sentencepiece==0.2.0 - # via speechbrain -shellingham==1.5.4 - # via typer -six==1.17.0 - # via - # astunparse - # google-pasta - # pretty-midi - # python-dateutil - # retrying - # tensorboard - # tensorflow -sortedcontainers==2.4.0 - # via pyannote-core -soundfile==0.13.1 - # via - # librosa - # pyannote-audio -speechbrain==1.0.3 - # via pyannote-audio -sqlalchemy==2.0.41 - # via - # alembic - # optuna -submitit==1.5.3 - # via dora-search -sympy==1.14.0 - # via - # onnxruntime - # pyannote-metrics - # torch -tabulate==0.9.0 - # via pyannote-metrics -tensorboard==2.19.0 - # via tensorflow -tensorboard-data-server==0.7.2 - # via tensorboard -tensorboardx==2.6.2.2 - # via pyannote-audio -tensorflow==2.19.0 - # via -r requirements.in -tensorflow-io-gcs-filesystem==0.31.0 - # via tensorflow -termcolor==3.1.0 - # via tensorflow -threadpoolctl==3.6.0 - # via scikit-learn -tokenizers==0.21.1 - # via - # faster-whisper - # transformers -tomli==2.2.1 - # via - # alembic - # black - # pylint - # pytest -tomlkit==0.13.3 - # via pylint -torch==2.7.1 - # via - # asteroid-filterbanks - # demucs - # dora-search - # julius - # lightning - # openunmix - # pyannote-audio - # pytorch-lightning - # pytorch-metric-learning - # speechbrain - # torch-audiomentations - # torch-pitch-shift - # torchaudio - # torchmetrics - # whisperx -torch-audiomentations==0.12.0 - # via pyannote-audio -torch-pitch-shift==1.2.5 - # via torch-audiomentations -torchaudio==2.7.1 - # via - # demucs - # openunmix - # pyannote-audio - # speechbrain - # torch-audiomentations - # torch-pitch-shift - # whisperx -torchmetrics==1.7.2 - # via - # lightning - # pyannote-audio - # pytorch-lightning -tqdm==4.67.1 - # via - # -r requirements.in - # demucs - # faster-whisper - # huggingface-hub - # lightning - # nltk - # openunmix - # optuna - # pyannote-pipeline - # pytorch-lightning - # pytorch-metric-learning - # speechbrain - # transformers -transformers==4.52.4 - # via whisperx -treetable==0.2.5 - # via dora-search -typer==0.16.0 - # via pyannote-database -typing-extensions==4.14.0 - # via - # alembic - # asteroid-filterbanks - # astroid - # black - # exceptiongroup - # huggingface-hub - # lightning - # lightning-utilities - # multidict - # optree - # pyannote-core - # pytorch-lightning - # rich - # sqlalchemy - # submitit - # tensorflow - # torch - # typer - # typing-inspect -typing-inspect==0.9.0 - # via dataclasses-json -tzdata==2025.2 - # via pandas -unidecode==1.4.0 - # via -r requirements.in -urllib3==2.4.0 - # via requests -webcolors==24.11.1 - # via music21 -werkzeug==3.1.3 - # via tensorboard -wheel==0.45.1 - # via - # astunparse - # pyhyphen -whisperx==3.3.1 - # via -r requirements.in -wrapt==1.17.2 - # via tensorflow -yarl==1.20.0 - # via aiohttp -yt-dlp==2025.5.22 - # via -r requirements.in - -# The following packages are considered to be unsafe in a requirements file: -# setuptools diff --git a/requirements-macos.txt b/requirements-macos.txt deleted file mode 100644 index 678ba553..00000000 --- a/requirements-macos.txt +++ /dev/null @@ -1,959 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.10 -# by the following command: -# -# pip-compile --output-file=requirements-macos.txt requirements-linux.txt requirements.in -# -# NOTE: The linux file was manually edited to change tensorflow-io-gcs-filesystem to 0.37.1 -# -absl-py==2.3.0 - # via - # -r requirements-linux.txt - # keras - # tensorboard - # tensorflow -aiohappyeyeballs==2.6.1 - # via - # -r requirements-linux.txt - # aiohttp -aiohttp==3.12.9 - # via - # -r requirements-linux.txt - # fsspec -aiosignal==1.3.2 - # via - # -r requirements-linux.txt - # aiohttp -alembic==1.16.1 - # via - # -r requirements-linux.txt - # optuna -antlr4-python3-runtime==4.9.3 - # via - # -r requirements-linux.txt - # omegaconf -appdirs==1.4.4 - # via - # -r requirements-linux.txt - # pyhyphen -asteroid-filterbanks==0.4.0 - # via - # -r requirements-linux.txt - # pyannote-audio -astroid==3.3.10 - # via - # -r requirements-linux.txt - # pylint -astunparse==1.6.3 - # via - # -r requirements-linux.txt - # tensorflow -async-timeout==5.0.1 - # via - # -r requirements-linux.txt - # aiohttp -attrs==25.3.0 - # via - # -r requirements-linux.txt - # aiohttp -audioread==3.0.1 - # via - # -r requirements-linux.txt - # librosa -av==14.4.0 - # via - # -r requirements-linux.txt - # faster-whisper -black==25.1.0 - # via - # -r requirements-linux.txt - # -r requirements.in -certifi==2025.4.26 - # via - # -r requirements-linux.txt - # requests -cffi==1.17.1 - # via - # -r requirements-linux.txt - # soundfile -chardet==5.2.0 - # via - # -r requirements-linux.txt - # music21 -charset-normalizer==3.4.2 - # via - # -r requirements-linux.txt - # requests -click==8.2.1 - # via - # -r requirements-linux.txt - # black - # nltk - # typer -cloudpickle==3.1.1 - # via - # -r requirements-linux.txt - # submitit -colorama==0.4.6 - # via -r requirements-linux.txt -coloredlogs==15.0.1 - # via - # -r requirements-linux.txt - # onnxruntime -colorlog==6.9.0 - # via - # -r requirements-linux.txt - # optuna -contourpy==1.3.2 - # via - # -r requirements-linux.txt - # matplotlib -crepe==0.0.16 - # via - # -r requirements-linux.txt - # -r requirements.in -ctranslate2==4.4.0 - # via - # -r requirements-linux.txt - # faster-whisper - # whisperx -cycler==0.12.1 - # via - # -r requirements-linux.txt - # matplotlib -dataclasses==0.6 - # via - # -r requirements-linux.txt - # -r requirements.in -dataclasses-json==0.6.7 - # via - # -r requirements-linux.txt - # -r requirements.in -decorator==5.2.1 - # via - # -r requirements-linux.txt - # librosa -demucs==4.0.1 - # via - # -r requirements-linux.txt - # -r requirements.in -dill==0.4.0 - # via - # -r requirements-linux.txt - # pylint -docopt==0.6.2 - # via - # -r requirements-linux.txt - # num2words - # pyannote-metrics - # pyannote-pipeline -dora-search==0.1.12 - # via - # -r requirements-linux.txt - # demucs -einops==0.8.1 - # via - # -r requirements-linux.txt - # demucs - # pyannote-audio -exceptiongroup==1.3.0 - # via - # -r requirements-linux.txt - # pytest -faster-whisper==1.1.0 - # via - # -r requirements-linux.txt - # whisperx -ffmpeg-python==0.2.0 - # via - # -r requirements-linux.txt - # -r requirements.in -filelock==3.18.0 - # via - # -r requirements-linux.txt - # huggingface-hub - # pyannote-pipeline - # torch - # transformers -flatbuffers==25.2.10 - # via - # -r requirements-linux.txt - # onnxruntime - # tensorflow -fonttools==4.58.1 - # via - # -r requirements-linux.txt - # matplotlib -frozenlist==1.6.2 - # via - # -r requirements-linux.txt - # aiohttp - # aiosignal -fsspec[http]==2025.5.1 - # via - # -r requirements-linux.txt - # huggingface-hub - # lightning - # pytorch-lightning - # torch -future==1.0.0 - # via - # -r requirements-linux.txt - # ffmpeg-python -gast==0.6.0 - # via - # -r requirements-linux.txt - # tensorflow -google-pasta==0.2.0 - # via - # -r requirements-linux.txt - # tensorflow -greenlet==3.2.3 - # via -r requirements-linux.txt -grpcio==1.72.1 - # via - # -r requirements-linux.txt - # tensorboard - # tensorflow -h5py==3.13.0 - # via - # -r requirements-linux.txt - # crepe - # keras - # tensorflow -hf-xet==1.2.0 - # via huggingface-hub -hmmlearn==0.3.3 - # via - # -r requirements-linux.txt - # crepe -huggingface-hub==0.32.4 - # via - # -r requirements-linux.txt - # faster-whisper - # pyannote-audio - # speechbrain - # tokenizers - # transformers -humanfriendly==10.0 - # via - # -r requirements-linux.txt - # coloredlogs -hyperpyyaml==1.2.2 - # via - # -r requirements-linux.txt - # speechbrain -idna==3.10 - # via - # -r requirements-linux.txt - # requests - # yarl -imageio==2.37.0 - # via - # -r requirements-linux.txt - # crepe -iniconfig==2.1.0 - # via - # -r requirements-linux.txt - # pytest -inputimeout==1.0.4 - # via - # -r requirements-linux.txt - # -r requirements.in -isort==6.0.1 - # via - # -r requirements-linux.txt - # -r requirements.in - # pylint -jinja2==3.1.6 - # via - # -r requirements-linux.txt - # torch -joblib==1.5.1 - # via - # -r requirements-linux.txt - # librosa - # music21 - # nltk - # scikit-learn - # speechbrain -jsonpickle==4.1.1 - # via - # -r requirements-linux.txt - # music21 -julius==0.2.7 - # via - # -r requirements-linux.txt - # demucs - # torch-audiomentations -keras==3.10.0 - # via - # -r requirements-linux.txt - # tensorflow -kiwisolver==1.4.8 - # via - # -r requirements-linux.txt - # matplotlib -lameenc==1.8.1 - # via - # -r requirements-linux.txt - # demucs -langcodes==3.5.0 - # via - # -r requirements-linux.txt - # -r requirements.in -language-data==1.3.0 - # via - # -r requirements-linux.txt - # langcodes -levenshtein==0.27.1 - # via - # -r requirements-linux.txt - # python-levenshtein -libclang==18.1.1 - # via - # -r requirements-linux.txt - # tensorflow -librosa==0.9.2 - # via - # -r requirements-linux.txt - # -r requirements.in -lightning==2.5.1.post0 - # via - # -r requirements-linux.txt - # pyannote-audio -lightning-utilities==0.14.3 - # via - # -r requirements-linux.txt - # lightning - # pytorch-lightning - # torchmetrics -llvmlite==0.44.0 - # via - # -r requirements-linux.txt - # numba -mako==1.3.10 - # via - # -r requirements-linux.txt - # alembic -marisa-trie==1.2.1 - # via - # -r requirements-linux.txt - # language-data -markdown==3.8 - # via - # -r requirements-linux.txt - # tensorboard -markdown-it-py==3.0.0 - # via - # -r requirements-linux.txt - # rich -markupsafe==3.0.2 - # via - # -r requirements-linux.txt - # jinja2 - # mako - # werkzeug -marshmallow==3.26.1 - # via - # -r requirements-linux.txt - # dataclasses-json -matplotlib==3.10.3 - # via - # -r requirements-linux.txt - # -r requirements.in - # crepe - # music21 - # pyannote-metrics -mccabe==0.7.0 - # via - # -r requirements-linux.txt - # pylint -mdurl==0.1.2 - # via - # -r requirements-linux.txt - # markdown-it-py -mido==1.3.3 - # via - # -r requirements-linux.txt - # pretty-midi -ml-dtypes==0.5.1 - # via - # -r requirements-linux.txt - # keras - # tensorflow -more-itertools==10.7.0 - # via - # -r requirements-linux.txt - # music21 -mpmath==1.3.0 - # via - # -r requirements-linux.txt - # sympy -multidict==6.4.4 - # via - # -r requirements-linux.txt - # aiohttp - # yarl -music21==9.7.0 - # via - # -r requirements-linux.txt - # -r requirements.in -musicbrainzngs==0.7.1 - # via - # -r requirements-linux.txt - # -r requirements.in -mypy-extensions==1.1.0 - # via - # -r requirements-linux.txt - # black - # typing-inspect -namex==0.1.0 - # via - # -r requirements-linux.txt - # keras -networkx==3.4.2 - # via - # -r requirements-linux.txt - # torch -nltk==3.9.1 - # via - # -r requirements-linux.txt - # whisperx -num2words==0.5.14 - # via - # -r requirements-linux.txt - # -r requirements.in -numba==0.61.2 - # via - # -r requirements-linux.txt - # librosa - # resampy -numpy==1.26.4 - # via - # -r requirements-linux.txt - # -r requirements.in - # asteroid-filterbanks - # contourpy - # crepe - # ctranslate2 - # h5py - # hmmlearn - # imageio - # keras - # librosa - # matplotlib - # ml-dtypes - # music21 - # numba - # onnxruntime - # openunmix - # optuna - # pandas - # pretty-midi - # pyannote-core - # pyannote-metrics - # pytorch-metric-learning - # resampy - # scikit-learn - # scipy - # soundfile - # speechbrain - # tensorboard - # tensorboardx - # tensorflow - # torchmetrics - # transformers -omegaconf==2.3.0 - # via - # -r requirements-linux.txt - # dora-search - # pyannote-audio -onnxruntime==1.22.0 - # via - # -r requirements-linux.txt - # faster-whisper -openunmix==1.3.0 - # via - # -r requirements-linux.txt - # demucs -opt-einsum==3.4.0 - # via - # -r requirements-linux.txt - # tensorflow -optree==0.16.0 - # via - # -r requirements-linux.txt - # keras -optuna==4.3.0 - # via - # -r requirements-linux.txt - # pyannote-pipeline -packaging==24.2 - # via - # -r requirements-linux.txt - # -r requirements.in - # black - # huggingface-hub - # keras - # librosa - # lightning - # lightning-utilities - # marshmallow - # matplotlib - # mido - # onnxruntime - # optuna - # pooch - # pytest - # pytorch-lightning - # speechbrain - # tensorboard - # tensorboardx - # tensorflow - # torch-pitch-shift - # torchmetrics - # transformers -pandas==2.3.0 - # via - # -r requirements-linux.txt - # pyannote-database - # pyannote-metrics - # whisperx -pathspec==0.12.1 - # via - # -r requirements-linux.txt - # black -pillow==11.2.1 - # via - # -r requirements-linux.txt - # imageio - # matplotlib -platformdirs==4.3.8 - # via - # -r requirements-linux.txt - # black - # pooch - # pylint -pluggy==1.6.0 - # via - # -r requirements-linux.txt - # pytest -pooch==1.8.2 - # via - # -r requirements-linux.txt - # librosa -pretty-midi==0.2.10 - # via - # -r requirements-linux.txt - # -r requirements.in -primepy==1.3 - # via - # -r requirements-linux.txt - # torch-pitch-shift -propcache==0.3.1 - # via - # -r requirements-linux.txt - # aiohttp - # yarl -protobuf==5.29.5 - # via - # -r requirements-linux.txt - # onnxruntime - # tensorboard - # tensorboardx - # tensorflow -pyannote-audio==3.3.2 - # via - # -r requirements-linux.txt - # whisperx -pyannote-core==5.0.0 - # via - # -r requirements-linux.txt - # pyannote-audio - # pyannote-database - # pyannote-metrics - # pyannote-pipeline -pyannote-database==5.1.3 - # via - # -r requirements-linux.txt - # pyannote-audio - # pyannote-metrics - # pyannote-pipeline -pyannote-metrics==3.2.1 - # via - # -r requirements-linux.txt - # pyannote-audio -pyannote-pipeline==3.0.1 - # via - # -r requirements-linux.txt - # pyannote-audio -pycparser==2.22 - # via - # -r requirements-linux.txt - # cffi -pydub==0.25.1 - # via - # -r requirements-linux.txt - # -r requirements.in -pygments==2.19.1 - # via - # -r requirements-linux.txt - # pytest - # rich -pyhyphen==4.0.4 - # via - # -r requirements-linux.txt - # -r requirements.in -pylint==3.3.7 - # via - # -r requirements-linux.txt - # -r requirements.in -pyparsing==3.2.3 - # via - # -r requirements-linux.txt - # matplotlib -pyreadline3==3.5.4 - # via -r requirements-linux.txt -pytest==8.4.0 - # via - # -r requirements-linux.txt - # -r requirements.in -python-dateutil==2.9.0.post0 - # via - # -r requirements-linux.txt - # matplotlib - # pandas -python-levenshtein==0.27.1 - # via - # -r requirements-linux.txt - # -r requirements.in -pytorch-lightning==2.5.1.post0 - # via - # -r requirements-linux.txt - # lightning -pytorch-metric-learning==2.8.1 - # via - # -r requirements-linux.txt - # pyannote-audio -pytz==2025.2 - # via - # -r requirements-linux.txt - # pandas -pyyaml==6.0.2 - # via - # -r requirements-linux.txt - # ctranslate2 - # demucs - # huggingface-hub - # hyperpyyaml - # lightning - # omegaconf - # optuna - # pyannote-database - # pyannote-pipeline - # pytorch-lightning - # transformers -rapidfuzz==3.13.0 - # via - # -r requirements-linux.txt - # levenshtein -regex==2024.11.6 - # via - # -r requirements-linux.txt - # nltk - # transformers -requests==2.32.3 - # via - # -r requirements-linux.txt - # huggingface-hub - # music21 - # pooch - # pyhyphen - # tensorflow - # transformers -resampy==0.4.3 - # via - # -r requirements-linux.txt - # crepe - # librosa -retrying==1.3.4 - # via - # -r requirements-linux.txt - # dora-search -rich==14.0.0 - # via - # -r requirements-linux.txt - # keras - # pyannote-audio - # typer -ruamel-yaml==0.18.12 - # via - # -r requirements-linux.txt - # hyperpyyaml -ruamel-yaml-clib==0.2.12 - # via - # -r requirements-linux.txt - # ruamel-yaml -safetensors==0.5.3 - # via - # -r requirements-linux.txt - # transformers -scikit-learn==1.6.1 - # via - # -r requirements-linux.txt - # crepe - # hmmlearn - # librosa - # pyannote-metrics - # pyannote-pipeline - # pytorch-metric-learning -scipy==1.15.3 - # via - # -r requirements-linux.txt - # -r requirements.in - # crepe - # hmmlearn - # librosa - # pyannote-core - # pyannote-metrics - # scikit-learn - # speechbrain -semver==3.0.4 - # via - # -r requirements-linux.txt - # pyannote-audio -sentencepiece==0.2.0 - # via - # -r requirements-linux.txt - # speechbrain -shellingham==1.5.4 - # via - # -r requirements-linux.txt - # typer -six==1.17.0 - # via - # -r requirements-linux.txt - # astunparse - # google-pasta - # pretty-midi - # python-dateutil - # retrying - # tensorboard - # tensorflow -sortedcontainers==2.4.0 - # via - # -r requirements-linux.txt - # pyannote-core -soundfile==0.13.1 - # via - # -r requirements-linux.txt - # librosa - # pyannote-audio -speechbrain==1.0.3 - # via - # -r requirements-linux.txt - # pyannote-audio -sqlalchemy==2.0.41 - # via - # -r requirements-linux.txt - # alembic - # optuna -submitit==1.5.3 - # via - # -r requirements-linux.txt - # dora-search -sympy==1.14.0 - # via - # -r requirements-linux.txt - # onnxruntime - # pyannote-metrics - # torch -tabulate==0.9.0 - # via - # -r requirements-linux.txt - # pyannote-metrics -tensorboard==2.19.0 - # via - # -r requirements-linux.txt - # tensorflow -tensorboard-data-server==0.7.2 - # via - # -r requirements-linux.txt - # tensorboard -tensorboardx==2.6.2.2 - # via - # -r requirements-linux.txt - # pyannote-audio -tensorflow==2.19.0 - # via - # -r requirements-linux.txt - # -r requirements.in -tensorflow-io-gcs-filesystem==0.37.1 - # via - # -r requirements-linux.txt -- manually changed to 0.37.1 from 0.31.0 - # tensorflow -termcolor==3.1.0 - # via - # -r requirements-linux.txt - # tensorflow -threadpoolctl==3.6.0 - # via - # -r requirements-linux.txt - # scikit-learn -tokenizers==0.21.1 - # via - # -r requirements-linux.txt - # faster-whisper - # transformers -tomli==2.2.1 - # via - # -r requirements-linux.txt - # alembic - # black - # pylint - # pytest -tomlkit==0.13.3 - # via - # -r requirements-linux.txt - # pylint -torch==2.7.1 - # via - # -r requirements-linux.txt - # asteroid-filterbanks - # demucs - # dora-search - # julius - # lightning - # openunmix - # pyannote-audio - # pytorch-lightning - # pytorch-metric-learning - # speechbrain - # torch-audiomentations - # torch-pitch-shift - # torchaudio - # torchmetrics - # whisperx -torch-audiomentations==0.12.0 - # via - # -r requirements-linux.txt - # pyannote-audio -torch-pitch-shift==1.2.5 - # via - # -r requirements-linux.txt - # torch-audiomentations -torchaudio==2.7.1 - # via - # -r requirements-linux.txt - # demucs - # openunmix - # pyannote-audio - # speechbrain - # torch-audiomentations - # torch-pitch-shift - # whisperx -torchmetrics==1.7.2 - # via - # -r requirements-linux.txt - # lightning - # pyannote-audio - # pytorch-lightning -tqdm==4.67.1 - # via - # -r requirements-linux.txt - # -r requirements.in - # demucs - # faster-whisper - # huggingface-hub - # lightning - # nltk - # openunmix - # optuna - # pyannote-pipeline - # pytorch-lightning - # pytorch-metric-learning - # speechbrain - # transformers -transformers==4.52.4 - # via - # -r requirements-linux.txt - # whisperx -treetable==0.2.5 - # via - # -r requirements-linux.txt - # dora-search -typer==0.16.0 - # via - # -r requirements-linux.txt - # pyannote-database -typing-extensions==4.14.0 - # via - # -r requirements-linux.txt - # alembic - # asteroid-filterbanks - # astroid - # black - # exceptiongroup - # huggingface-hub - # lightning - # lightning-utilities - # multidict - # optree - # pyannote-core - # pytorch-lightning - # rich - # sqlalchemy - # submitit - # tensorflow - # torch - # typer - # typing-inspect -typing-inspect==0.9.0 - # via - # -r requirements-linux.txt - # dataclasses-json -tzdata==2025.2 - # via - # -r requirements-linux.txt - # pandas -unidecode==1.4.0 - # via - # -r requirements-linux.txt - # -r requirements.in -urllib3==2.4.0 - # via - # -r requirements-linux.txt - # requests -webcolors==24.11.1 - # via - # -r requirements-linux.txt - # music21 -werkzeug==3.1.3 - # via - # -r requirements-linux.txt - # tensorboard -wheel==0.45.1 - # via - # -r requirements-linux.txt - # astunparse - # pyhyphen -whisperx==3.3.1 - # via - # -r requirements-linux.txt - # -r requirements.in -wrapt==1.17.2 - # via - # -r requirements-linux.txt - # tensorflow -yarl==1.20.0 - # via - # -r requirements-linux.txt - # aiohttp -yt-dlp==2025.5.22 - # via - # -r requirements-linux.txt - # -r requirements.in - -# The following packages are considered to be unsafe in a requirements file: -# setuptools diff --git a/requirements-windows.txt b/requirements-windows.txt deleted file mode 100644 index e615ebae..00000000 --- a/requirements-windows.txt +++ /dev/null @@ -1,661 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.10 -# by the following command: -# -# pip-compile -# -absl-py==2.3.0 - # via - # tensorboard - # tensorflow -aiohappyeyeballs==2.6.1 - # via aiohttp -aiohttp==3.12.9 - # via fsspec -aiosignal==1.3.2 - # via aiohttp -alembic==1.16.1 - # via optuna -antlr4-python3-runtime==4.9.3 - # via omegaconf -appdirs==1.4.4 - # via pyhyphen -asteroid-filterbanks==0.4.0 - # via pyannote-audio -astroid==3.3.10 - # via pylint -astunparse==1.6.3 - # via tensorflow -async-timeout==5.0.1 - # via aiohttp -attrs==25.3.0 - # via aiohttp -audioread==3.0.1 - # via librosa -av==14.4.0 - # via faster-whisper -black==25.1.0 - # via -r requirements.in -cachetools==5.5.2 - # via google-auth -certifi==2025.4.26 - # via requests -cffi==1.17.1 - # via soundfile -chardet==5.2.0 - # via music21 -charset-normalizer==3.4.2 - # via requests -click==8.2.1 - # via - # black - # nltk - # typer -cloudpickle==3.1.1 - # via submitit -colorama==0.4.6 - # via - # click - # colorlog - # pylint - # pytest - # tqdm -coloredlogs==15.0.1 - # via onnxruntime -colorlog==6.9.0 - # via optuna -contourpy==1.3.2 - # via matplotlib -crepe==0.0.16 - # via -r requirements.in -ctranslate2==4.4.0 - # via - # faster-whisper - # whisperx -cycler==0.12.1 - # via matplotlib -dataclasses==0.6 - # via -r requirements.in -dataclasses-json==0.6.7 - # via -r requirements.in -decorator==5.2.1 - # via librosa -demucs==4.0.1 - # via -r requirements.in -dill==0.4.0 - # via pylint -docopt==0.6.2 - # via - # num2words - # pyannote-metrics - # pyannote-pipeline -dora-search==0.1.12 - # via demucs -einops==0.8.1 - # via - # demucs - # pyannote-audio -exceptiongroup==1.3.0 - # via pytest -faster-whisper==1.1.0 - # via whisperx -ffmpeg-python==0.2.0 - # via -r requirements.in -filelock==3.18.0 - # via - # huggingface-hub - # pyannote-pipeline - # torch - # transformers -flatbuffers==25.2.10 - # via - # onnxruntime - # tensorflow -fonttools==4.58.1 - # via matplotlib -frozenlist==1.6.2 - # via - # aiohttp - # aiosignal -fsspec[http]==2025.5.1 - # via - # huggingface-hub - # lightning - # pytorch-lightning - # torch -future==1.0.0 - # via ffmpeg-python -gast==0.4.0 - # via tensorflow -google-auth==2.40.3 - # via - # google-auth-oauthlib - # tensorboard -google-auth-oauthlib==0.4.6 - # via tensorboard -google-pasta==0.2.0 - # via tensorflow -greenlet==3.2.3 - # via sqlalchemy -grpcio==1.72.1 - # via - # tensorboard - # tensorflow -h5py==3.13.0 - # via - # crepe - # tensorflow -hmmlearn==0.3.3 - # via crepe -huggingface-hub==0.32.4 - # via - # faster-whisper - # pyannote-audio - # speechbrain - # tokenizers - # transformers -humanfriendly==10.0 - # via coloredlogs -hyperpyyaml==1.2.2 - # via speechbrain -idna==3.10 - # via - # requests - # yarl -imageio==2.37.0 - # via crepe -iniconfig==2.1.0 - # via pytest -inputimeout==1.0.4 - # via -r requirements.in -isort==6.0.1 - # via - # -r requirements.in - # pylint -jinja2==3.1.6 - # via torch -joblib==1.5.1 - # via - # librosa - # music21 - # nltk - # scikit-learn - # speechbrain -jsonpickle==4.1.1 - # via music21 -julius==0.2.7 - # via - # demucs - # torch-audiomentations -keras==2.10.0 - # via tensorflow -keras-preprocessing==1.1.2 - # via tensorflow -kiwisolver==1.4.8 - # via matplotlib -lameenc==1.8.1 - # via demucs -langcodes==3.5.0 - # via -r requirements.in -language-data==1.3.0 - # via langcodes -levenshtein==0.27.1 - # via python-levenshtein -libclang==18.1.1 - # via tensorflow -librosa==0.9.2 - # via -r requirements.in -lightning==2.5.1.post0 - # via pyannote-audio -lightning-utilities==0.14.3 - # via - # lightning - # pytorch-lightning - # torchmetrics -llvmlite==0.44.0 - # via numba -mako==1.3.10 - # via alembic -marisa-trie==1.2.1 - # via language-data -markdown==3.8 - # via tensorboard -markdown-it-py==3.0.0 - # via rich -markupsafe==3.0.2 - # via - # jinja2 - # mako - # werkzeug -marshmallow==3.26.1 - # via dataclasses-json -matplotlib==3.10.3 - # via - # -r requirements.in - # crepe - # music21 - # pyannote-metrics -mccabe==0.7.0 - # via pylint -mdurl==0.1.2 - # via markdown-it-py -mido==1.3.3 - # via pretty-midi -more-itertools==10.7.0 - # via music21 -mpmath==1.3.0 - # via sympy -multidict==6.4.4 - # via - # aiohttp - # yarl -music21==9.7.0 - # via -r requirements.in -musicbrainzngs==0.7.1 - # via -r requirements.in -mypy-extensions==1.1.0 - # via - # black - # typing-inspect -networkx==3.4.2 - # via torch -nltk==3.9.1 - # via whisperx -num2words==0.5.14 - # via -r requirements.in -numba==0.61.2 - # via - # librosa - # resampy -numpy==1.26.4 - # via - # -r requirements.in - # asteroid-filterbanks - # contourpy - # crepe - # ctranslate2 - # h5py - # hmmlearn - # imageio - # keras-preprocessing - # librosa - # matplotlib - # music21 - # numba - # onnxruntime - # openunmix - # optuna - # pandas - # pretty-midi - # pyannote-core - # pyannote-metrics - # pytorch-metric-learning - # resampy - # scikit-learn - # scipy - # soundfile - # speechbrain - # tensorboard - # tensorboardx - # tensorflow - # torchmetrics - # transformers -oauthlib==3.2.2 - # via requests-oauthlib -omegaconf==2.3.0 - # via - # dora-search - # pyannote-audio -onnxruntime==1.22.0 - # via faster-whisper -openunmix==1.3.0 - # via demucs -opt-einsum==3.4.0 - # via tensorflow -optuna==4.3.0 - # via pyannote-pipeline -packaging==24.2 - # via - # -r requirements.in - # black - # huggingface-hub - # librosa - # lightning - # lightning-utilities - # marshmallow - # matplotlib - # mido - # onnxruntime - # optuna - # pooch - # pytest - # pytorch-lightning - # speechbrain - # tensorboardx - # tensorflow - # torch-pitch-shift - # torchmetrics - # transformers -pandas==2.3.0 - # via - # pyannote-database - # pyannote-metrics - # whisperx -pathspec==0.12.1 - # via black -pillow==11.2.1 - # via - # imageio - # matplotlib -platformdirs==4.3.8 - # via - # black - # pooch - # pylint -pluggy==1.6.0 - # via pytest -pooch==1.8.2 - # via librosa -pretty-midi==0.2.10 - # via -r requirements.in -primepy==1.3 - # via torch-pitch-shift -propcache==0.3.1 - # via - # aiohttp - # yarl -protobuf==3.19.6 - # via - # onnxruntime - # tensorboard - # tensorboardx - # tensorflow -pyannote-audio==3.3.2 - # via whisperx -pyannote-core==5.0.0 - # via - # pyannote-audio - # pyannote-database - # pyannote-metrics - # pyannote-pipeline -pyannote-database==5.1.3 - # via - # pyannote-audio - # pyannote-metrics - # pyannote-pipeline -pyannote-metrics==3.2.1 - # via pyannote-audio -pyannote-pipeline==3.0.1 - # via pyannote-audio -pyasn1==0.6.1 - # via - # pyasn1-modules - # rsa -pyasn1-modules==0.4.2 - # via google-auth -pycparser==2.22 - # via cffi -pydub==0.25.1 - # via -r requirements.in -pygments==2.19.1 - # via - # pytest - # rich -pyhyphen==4.0.4 - # via -r requirements.in -pylint==3.3.7 - # via -r requirements.in -pyparsing==3.2.3 - # via matplotlib -pyreadline3==3.5.4 - # via humanfriendly -pytest==8.4.0 - # via -r requirements.in -python-dateutil==2.9.0.post0 - # via - # matplotlib - # pandas -python-levenshtein==0.27.1 - # via -r requirements.in -pytorch-lightning==2.5.1.post0 - # via lightning -pytorch-metric-learning==2.8.1 - # via pyannote-audio -pytz==2025.2 - # via pandas -pyyaml==6.0.2 - # via - # ctranslate2 - # demucs - # huggingface-hub - # hyperpyyaml - # lightning - # omegaconf - # optuna - # pyannote-database - # pyannote-pipeline - # pytorch-lightning - # transformers -rapidfuzz==3.13.0 - # via levenshtein -regex==2024.11.6 - # via - # nltk - # transformers -requests==2.32.3 - # via - # huggingface-hub - # music21 - # pooch - # pyhyphen - # requests-oauthlib - # tensorboard - # transformers -requests-oauthlib==2.0.0 - # via google-auth-oauthlib -resampy==0.4.3 - # via - # crepe - # librosa -retrying==1.3.4 - # via dora-search -rich==14.0.0 - # via - # pyannote-audio - # typer -rsa==4.9.1 - # via google-auth -ruamel-yaml==0.18.12 - # via hyperpyyaml -ruamel-yaml-clib==0.2.12 - # via ruamel-yaml -safetensors==0.5.3 - # via transformers -scikit-learn==1.6.1 - # via - # crepe - # hmmlearn - # librosa - # pyannote-metrics - # pyannote-pipeline - # pytorch-metric-learning -scipy==1.15.3 - # via - # -r requirements.in - # crepe - # hmmlearn - # librosa - # pyannote-core - # pyannote-metrics - # scikit-learn - # speechbrain -semver==3.0.4 - # via pyannote-audio -sentencepiece==0.2.0 - # via speechbrain -shellingham==1.5.4 - # via typer -six==1.17.0 - # via - # astunparse - # google-pasta - # keras-preprocessing - # pretty-midi - # python-dateutil - # retrying - # tensorflow -sortedcontainers==2.4.0 - # via pyannote-core -soundfile==0.13.1 - # via - # librosa - # pyannote-audio -speechbrain==1.0.3 - # via pyannote-audio -sqlalchemy==2.0.41 - # via - # alembic - # optuna -submitit==1.5.3 - # via dora-search -sympy==1.14.0 - # via - # onnxruntime - # pyannote-metrics - # torch -tabulate==0.9.0 - # via pyannote-metrics -tensorboard==2.10.1 - # via tensorflow -tensorboard-data-server==0.6.1 - # via tensorboard -tensorboard-plugin-wit==1.8.1 - # via tensorboard -tensorboardx==2.6 - # via pyannote-audio -tensorflow==2.10.0 - # via -r requirements.in -tensorflow-estimator==2.10.0 - # via tensorflow -tensorflow-io-gcs-filesystem==0.31.0 - # via tensorflow -termcolor==3.1.0 - # via tensorflow -threadpoolctl==3.6.0 - # via scikit-learn -tokenizers==0.21.1 - # via - # faster-whisper - # transformers -tomli==2.2.1 - # via - # alembic - # black - # pylint - # pytest -tomlkit==0.13.3 - # via pylint -torch==2.7.1 - # via - # asteroid-filterbanks - # demucs - # dora-search - # julius - # lightning - # openunmix - # pyannote-audio - # pytorch-lightning - # pytorch-metric-learning - # speechbrain - # torch-audiomentations - # torch-pitch-shift - # torchaudio - # torchmetrics - # whisperx -torch-audiomentations==0.12.0 - # via pyannote-audio -torch-pitch-shift==1.2.5 - # via torch-audiomentations -torchaudio==2.7.1 - # via - # demucs - # openunmix - # pyannote-audio - # speechbrain - # torch-audiomentations - # torch-pitch-shift - # whisperx -torchmetrics==1.7.2 - # via - # lightning - # pyannote-audio - # pytorch-lightning -tqdm==4.67.1 - # via - # -r requirements.in - # demucs - # faster-whisper - # huggingface-hub - # lightning - # nltk - # openunmix - # optuna - # pyannote-pipeline - # pytorch-lightning - # pytorch-metric-learning - # speechbrain - # transformers -transformers==4.52.4 - # via whisperx -treetable==0.2.5 - # via dora-search -typer==0.16.0 - # via pyannote-database -typing-extensions==4.14.0 - # via - # alembic - # asteroid-filterbanks - # astroid - # black - # exceptiongroup - # huggingface-hub - # lightning - # lightning-utilities - # multidict - # pyannote-core - # pytorch-lightning - # rich - # sqlalchemy - # submitit - # tensorflow - # torch - # typer - # typing-inspect -typing-inspect==0.9.0 - # via dataclasses-json -tzdata==2025.2 - # via pandas -unidecode==1.4.0 - # via -r requirements.in -urllib3==2.4.0 - # via requests -webcolors==24.11.1 - # via music21 -werkzeug==3.1.3 - # via tensorboard -wheel==0.45.1 - # via - # astunparse - # pyhyphen - # tensorboard -whisperx==3.3.1 - # via -r requirements.in -wrapt==1.17.2 - # via tensorflow -yarl==1.20.0 - # via aiohttp -yt-dlp==2025.5.22 - # via -r requirements.in - -# The following packages are considered to be unsafe in a requirements file: -# setuptools diff --git a/requirements.in b/requirements.in deleted file mode 100644 index 121c1736..00000000 --- a/requirements.in +++ /dev/null @@ -1,89 +0,0 @@ -# requirements.in - -# HOW TO USE: -# Change the tensorflow version for each OS -# pip-compile -o requirements-linux.txt -# pip-compile -o requirements-windows.txt -# Update Package -# pip-compile -P yt_dlp -# Update all packages -# pip-compile -U - -# Pitching -crepe -# machine learning platform (used for crepe) -# This is only compatible with Python 3.10 and older -# 2.10 is only needed for windows GPU support -# WINDOWS -#tensorflow==2.10 -# LINUX -tensorflow - -# Audio to wavefile (used for crepe) -scipy - -# Transcription -whisperx -#git+https://github.com/m-bain/whisperx.git -# Convert numbers to words in multilanguage (Used because of whisper) -num2words -# User imput timeout (used in whisper) -inputimeout - -# langcodes for creates the language name from the language code. Used for Ultrastar txt -langcodes -# Version handling (Used in UltraStar txt) -packaging - -# Audio analysis -librosa~=0.9.2 -# Audio manipulation -pydub -# Audio Separation -demucs -# Ffmpeg wrapper -ffmpeg_python - -# Plot engine -matplotlib - -# For getting additional song info like cover, genre, year -musicbrainzngs -# For distance calculation (used in musicbrainz_client) -python_Levenshtein - -# For midi file handling -pretty_midi -# Convert Unicode to ascii (used for midi) -unidecode - -# For text hyphenation -#PyHyphen -pyhyphen - -# Progress bar -tqdm - -# yt -yt_dlp - -# external cleanup tools -## Sort imports -isort -## Code formatter -black -## Code analyser -pylint - -# For Tests -pytest - -# Musical Analysis and Computational Musicology (Used for sheet) -music21 - -# Data structure -dataclasses -dataclasses_json - -# Array support -numpy<2 \ No newline at end of file diff --git a/run_on_linux.sh b/run_on_linux.sh index 5fecb0e7..58609c39 100644 --- a/run_on_linux.sh +++ b/run_on_linux.sh @@ -1,2 +1,22 @@ #!/bin/bash -gnome-terminal -- bash -c ".venv\Scripts\activate; cd src; exec bash" \ No newline at end of file +# Activate virtual environment and run UltraSinger + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +cd "$SCRIPT_DIR" + +# Check if .venv exists +if [ ! -d ".venv" ]; then + echo "Error: Virtual environment not found at .venv" + echo "Please run one of the installation scripts first:" + echo " - install/CPU/linux_cpu.sh (for CPU)" + echo " - install/CUDA/linux_cuda_gpu.sh (for GPU with CUDA)" + exit 1 +fi + +# Activate virtual environment +source .venv/bin/activate + +# Navigate to src and start UltraSinger +cd src +echo "Starting UltraSinger..." +python UltraSinger.py diff --git a/run_on_mac.command b/run_on_mac.command index 3c581b7a..91a182d7 100755 --- a/run_on_mac.command +++ b/run_on_mac.command @@ -1,2 +1,21 @@ #!/bin/bash -cd "$(dirname "$0")"; source .venv/bin/activate; cd src; exec bash +# Activate virtual environment and run UltraSinger on macOS + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +cd "$SCRIPT_DIR" + +# Check if .venv exists +if [ ! -d ".venv" ]; then + echo "Error: Virtual environment not found at .venv" + echo "Please run the installation script first:" + echo " - install/CPU/macos_cpu.sh" + exit 1 +fi + +# Activate virtual environment +source .venv/bin/activate + +# Navigate to src and start UltraSinger +cd src +echo "Starting UltraSinger..." +python UltraSinger.py diff --git a/run_on_windows.bat b/run_on_windows.bat index 2b2ed4c6..95b60837 100644 --- a/run_on_windows.bat +++ b/run_on_windows.bat @@ -1,2 +1,5 @@ @echo off -start cmd /k ".venv\Scripts\activate && cd src" \ No newline at end of file +:: Activate the virtual environment and open cmd in the src directory +call .venv\Scripts\activate.bat +cd src +cmd /k diff --git a/src/Settings.py b/src/Settings.py index 66623e11..7b2f66fd 100644 --- a/src/Settings.py +++ b/src/Settings.py @@ -11,7 +11,7 @@ @dataclass class Settings: - APP_VERSION = "0.0.13-dev13" + APP_VERSION = "0.0.13.dev14" CONFIDENCE_THRESHOLD = 0.6 CONFIDENCE_PROMPT_TIMEOUT = 4 @@ -47,16 +47,10 @@ class Settings: whisper_compute_type = None # change to "int8" if low on GPU mem (may reduce accuracy) keep_numbers = False - # Pitch - crepe_model_capacity = "full" # tiny|small|medium|large|full - crepe_step_size = 10 # in miliseconds - # Device pytorch_device = 'cpu' # cpu|cuda - tensorflow_device = 'cpu' # cpu|cuda force_cpu = False force_whisper_cpu = False - force_crepe_cpu = False # MuseScore musescore_path = None @@ -66,7 +60,7 @@ class Settings: # UltraSinger Evaluation Configuration test_songs_input_folder = None - cache_override_path = None + cache_override_path = None #"C:\\UltraSinger\\test_output" skip_cache_vocal_separation = False skip_cache_denoise_vocal_audio = False skip_cache_transcription = False diff --git a/src/UltraSinger.py b/src/UltraSinger.py index 7c3a4130..2b52d0ce 100644 --- a/src/UltraSinger.py +++ b/src/UltraSinger.py @@ -20,7 +20,7 @@ from modules.Audio.key_detector import detect_key_from_audio, get_allowed_notes_for_key from modules.Audio.silence_processing import remove_silence_from_transcription_data, mute_no_singing_parts from modules.Audio.separation import DemucsModel -from modules.Audio.convert_audio import convert_audio_to_mono_wav, convert_wav_to_mp3 +from modules.Audio.convert_audio import convert_audio_to_mono_wav, convert_audio_format from modules.Audio.youtube import ( download_from_youtube, ) @@ -43,7 +43,7 @@ from modules.Midi.MidiSegment import MidiSegment from modules.Midi.note_length_calculator import get_thirtytwo_note_second, get_sixteenth_note_second from modules.Pitcher.pitcher import ( - get_pitch_with_crepe_file, + get_pitch_with_file, ) from modules.Pitcher.pitched_data import PitchedData from modules.Speech_Recognition.TranscriptionResult import TranscriptionResult @@ -460,10 +460,12 @@ def InitProcessData(): settings.output_folder_path, audio_file_path, ultrastar_class, + audio_extension, ) = parse_ultrastar_txt(settings.input_file_path, settings.output_folder_path) process_data = from_ultrastar_txt(ultrastar_class) process_data.basename = basename process_data.process_data_paths.audio_output_file_path = audio_file_path + process_data.media_info.audio_extension = audio_extension # todo: ignore transcribe settings.ignore_audio = True @@ -517,15 +519,15 @@ def CreateUltraStarTxt(process_data: ProcessData): # Move instrumental and vocals if settings.create_karaoke and version.parse(settings.format_version.value) < version.parse( FormatVersion.V1_1_0.value): - karaoke_output_path = os.path.join(settings.output_folder_path, process_data.basename + " [Karaoke].m4a") - convert_wav_to_mp3(process_data.process_data_paths.instrumental_audio_file_path, karaoke_output_path) + karaoke_output_path = os.path.join(settings.output_folder_path, process_data.basename + " [Karaoke]." + process_data.media_info.audio_extension) + convert_audio_format(process_data.process_data_paths.instrumental_audio_file_path, karaoke_output_path) if version.parse(settings.format_version.value) >= version.parse(FormatVersion.V1_1_0.value): instrumental_output_path = os.path.join(settings.output_folder_path, - process_data.basename + " [Instrumental].m4a") - convert_wav_to_mp3(process_data.process_data_paths.instrumental_audio_file_path, instrumental_output_path) - vocals_output_path = os.path.join(settings.output_folder_path, process_data.basename + " [Vocals].m4a") - convert_wav_to_mp3(process_data.process_data_paths.vocals_audio_file_path, vocals_output_path) + process_data.basename + " [Instrumental]." + process_data.media_info.audio_extension) + convert_audio_format(process_data.process_data_paths.instrumental_audio_file_path, instrumental_output_path) + vocals_output_path = os.path.join(settings.output_folder_path, process_data.basename + " [Vocals]." + process_data.media_info.audio_extension) + convert_audio_format(process_data.process_data_paths.vocals_audio_file_path, vocals_output_path) # Create Ultrastar txt if not settings.ignore_audio: @@ -663,12 +665,14 @@ def infos_from_audio_video_input_file() -> tuple[str, str, str, MediaInfo]: os_helper.copy(settings.input_file_path, video_with_audio_path) # Separate audio and video - ultrastar_audio_input_path, final_video_path = separate_audio_video( + ultrastar_audio_input_path, final_video_path, audio_ext, video_ext = separate_audio_video( video_with_audio_path, basename_without_ext, song_folder_output_path ) else: # Audio file basename_with_ext = f"{basename_without_ext}{extension}" + audio_ext = extension.lstrip('.') + video_ext = None os_helper.copy(settings.input_file_path, song_folder_output_path) os_helper.rename( os.path.join(song_folder_output_path, os.path.basename(settings.input_file_path)), @@ -692,6 +696,8 @@ def infos_from_audio_video_input_file() -> tuple[str, str, str, MediaInfo]: genre=song_info.genres, bpm=real_bpm, cover_url=song_info.cover_url, + audio_extension=audio_ext, + video_extension=video_ext ), ) @@ -700,16 +706,13 @@ def pitch_audio( process_data_paths: ProcessDataPaths) -> PitchedData: """Pitch audio""" - pitching_config = f"crepe_{settings.ignore_audio}_{settings.crepe_model_capacity}_{settings.crepe_step_size}_{settings.tensorflow_device}" + pitching_config = f"swiftf0_{settings.ignore_audio}" pitched_data_path = os.path.join(process_data_paths.cache_folder_path, f"{pitching_config}.json") cache_available = check_file_exists(pitched_data_path) - if settings.skip_cache_transcription or not cache_available: - pitched_data = get_pitch_with_crepe_file( - process_data_paths.processing_audio_path, - settings.crepe_model_capacity, - settings.crepe_step_size, - settings.tensorflow_device, + if settings.skip_cache_pitch_detection or not cache_available: + pitched_data = get_pitch_with_file( + process_data_paths.processing_audio_path ) pitched_data_json = pitched_data.to_json() @@ -737,7 +740,7 @@ def main(argv: list[str]) -> None: def check_requirements() -> None: if not settings.force_cpu: - settings.tensorflow_device, settings.pytorch_device = check_gpu_support() + settings.pytorch_device = check_gpu_support() print(f"{ULTRASINGER_HEAD} ----------------------") if not is_ffmpeg_available(settings.user_ffmpeg_path): diff --git a/src/modules/Audio/convert_audio.py b/src/modules/Audio/convert_audio.py index bb188786..00fcd8f9 100644 --- a/src/modules/Audio/convert_audio.py +++ b/src/modules/Audio/convert_audio.py @@ -1,6 +1,7 @@ """Convert audio to other formats""" -from pydub import AudioSegment +import subprocess +import os import librosa import soundfile as sf @@ -14,9 +15,27 @@ def convert_audio_to_mono_wav(input_file_path: str, output_file_path: str) -> No sf.write(output_file_path, y, sr) -def convert_wav_to_mp3(input_file_path: str, output_file_path: str) -> None: - """Convert wav to mp3""" - print(f"{ULTRASINGER_HEAD} Converting wav to mp3. -> {output_file_path}") +def convert_audio_format(input_file_path: str, output_file_path: str) -> None: + """Convert audio to the format specified by the output file extension using ffmpeg""" + output_format = os.path.splitext(output_file_path)[1].lstrip('.') - sound = AudioSegment.from_wav(input_file_path) - sound.export(output_file_path, format="mp3") + print(f"{ULTRASINGER_HEAD} Converting audio to {output_format}. -> {output_file_path}") + # todo: makes it sense to reencode here? Its only used for Instrumental and Vocal + # Use ffmpeg for audio conversion + # -i: input file + # -y: overwrite output file without asking + # -loglevel error: only show errors + # -q:a 0: best quality for VBR formats (mp3, ogg) + # -codec:a copy would be fastest but only works if formats match + cmd = [ + "ffmpeg", + "-i", input_file_path, + "-y", + "-loglevel", "error", + "-q:a", "0", + output_file_path + ] + + result = subprocess.run(cmd, capture_output=True, text=True) + if result.returncode != 0: + raise RuntimeError(f"FFmpeg audio conversion failed: {result.stderr}") diff --git a/src/modules/Audio/key_detector.py b/src/modules/Audio/key_detector.py index 26326585..bc5b008d 100644 --- a/src/modules/Audio/key_detector.py +++ b/src/modules/Audio/key_detector.py @@ -1,10 +1,8 @@ """Key detection and pitch quantization to musical scale""" -import copy import librosa import numpy as np -from modules.Midi.MidiSegment import MidiSegment from modules.console_colors import ( ULTRASINGER_HEAD, blue_highlighted) diff --git a/src/modules/Audio/youtube.py b/src/modules/Audio/youtube.py index 8e919806..cd3dfafb 100644 --- a/src/modules/Audio/youtube.py +++ b/src/modules/Audio/youtube.py @@ -104,7 +104,7 @@ def download_from_youtube(input_url: str, output_folder_path: str, cookiefile: s video_with_audio_path = os.path.join(song_output, f"{basename_without_ext}.{video_ext}") # Separate audio and video - audio_file_path, final_video_path = separate_audio_video( + audio_file_path, final_video_path, audio_ext, video_ext = separate_audio_video( video_with_audio_path, basename_without_ext, song_output ) @@ -129,5 +129,7 @@ def download_from_youtube(input_url: str, output_folder_path: str, cookiefile: s bpm=real_bpm, cover_url=cover_url, video_url=input_url, + audio_extension=audio_ext, + video_extension=video_ext ), ) diff --git a/src/modules/DeviceDetection/device_detection.py b/src/modules/DeviceDetection/device_detection.py index 2466b6ad..27022819 100644 --- a/src/modules/DeviceDetection/device_detection.py +++ b/src/modules/DeviceDetection/device_detection.py @@ -1,38 +1,19 @@ """Device detection module.""" import torch -import os -import tensorflow as tf from modules.console_colors import ULTRASINGER_HEAD, red_highlighted, blue_highlighted -tensorflow_gpu_supported = False pytorch_gpu_supported = False -def check_gpu_support() -> tuple[bool, bool]: - """Check worker device (e.g cuda or cpu) supported by tensorflow and pytorch""" +def check_gpu_support() -> str: + """Check worker device (e.g cuda or cpu) supported by pytorch""" print(f"{ULTRASINGER_HEAD} Checking GPU support.") pytorch_gpu_supported = __check_pytorch_support() - tensorflow_gpu_supported = __check_tensorflow_support() - return 'cuda' if tensorflow_gpu_supported else 'cpu', 'cuda' if pytorch_gpu_supported else 'cpu' - - -def __check_tensorflow_support(): - tensorflow_gpu_supported = False - gpus = tf.config.list_physical_devices('GPU') - if gpus: - tensorflow_gpu_supported = True - print(f"{ULTRASINGER_HEAD} {blue_highlighted('tensorflow')} - using {red_highlighted('cuda')} gpu.") - else: - print( - f"{ULTRASINGER_HEAD} {blue_highlighted('tensorflow')} - there are no {red_highlighted('cuda')} devices available -> Using {red_highlighted('cpu')}.") - if os.name == 'nt': - print( - f"{ULTRASINGER_HEAD} {blue_highlighted('tensorflow')} - versions above 2.10 dropped GPU support for Windows, refer to the readme for possible solutions.") - return tensorflow_gpu_supported + return 'cuda' if pytorch_gpu_supported else 'cpu' def __check_pytorch_support(): diff --git a/src/modules/Midi/midi_creator.py b/src/modules/Midi/midi_creator.py index a89f1ca4..97446e19 100644 --- a/src/modules/Midi/midi_creator.py +++ b/src/modules/Midi/midi_creator.py @@ -17,7 +17,7 @@ from modules.Ultrastar.ultrastar_txt import UltrastarTxtValue from modules.Pitcher.pitched_data import PitchedData from modules.Pitcher.pitched_data_helper import get_frequencies_with_high_confidence - +from modules.Audio.key_detector import quantize_note_to_key def create_midi_instrument(midi_segments: list[MidiSegment]) -> object: """Converts an Ultrastar data to a midi instrument""" @@ -145,7 +145,6 @@ def create_midi_note_from_pitched_data(start_time: float, end_time: float, pitch note = most_frequent(notes)[0][0] if allowed_notes is not None: - from modules.Audio.key_detector import quantize_note_to_key note = quantize_note_to_key(note, allowed_notes) return MidiSegment(note, start_time, end_time, word) diff --git a/src/modules/Pitcher/pitcher.py b/src/modules/Pitcher/pitcher.py index de6fc772..697dc55f 100644 --- a/src/modules/Pitcher/pitcher.py +++ b/src/modules/Pitcher/pitcher.py @@ -1,41 +1,74 @@ """Pitcher module""" -import os +import numpy as np -import crepe from scipy.io import wavfile +from swift_f0 import SwiftF0 -from modules.console_colors import ULTRASINGER_HEAD, blue_highlighted, red_highlighted -from modules.Midi.midi_creator import convert_frequencies_to_notes, most_frequent +from modules.console_colors import ULTRASINGER_HEAD, blue_highlighted from modules.Pitcher.pitched_data import PitchedData -from modules.Pitcher.pitched_data_helper import get_frequencies_with_high_confidence +_swift_f0_detector = None -def get_pitch_with_crepe_file( - filename: str, model_capacity: str, step_size: int = 10, device: str = "cpu" +def _get_detector(): + """Lazy initialize SwiftF0 detector""" + global _swift_f0_detector + if _swift_f0_detector is None: + # Initialize for general music/speech (wide frequency range) fmin=46.875, fmax=2093.75 + # For speech only: fmin=65, fmax=400 + _swift_f0_detector = SwiftF0(fmin=46.875, fmax=2093.75, confidence_threshold=0.9) + return _swift_f0_detector + + +def get_pitch_with_file( + filename: str ) -> PitchedData: - """Pitch with crepe""" + """Pitch detection using SwiftF0""" print( - f"{ULTRASINGER_HEAD} Pitching with {blue_highlighted('crepe')} and model {blue_highlighted(model_capacity)} and {red_highlighted(device)} as worker" + f"{ULTRASINGER_HEAD} Pitching with {blue_highlighted('SwiftF0')}" ) sample_rate, audio = wavfile.read(filename) - return get_pitch_with_crepe(audio, sample_rate, model_capacity, step_size) - - -def get_pitch_with_crepe( - audio, sample_rate: int, model_capacity: str, step_size: int = 10 + # Convert stereo to mono if needed + if len(audio.shape) > 1: + audio = np.mean(audio, axis=1) + + # Normalize audio to float32 based on dtype + if audio.dtype == np.uint8: + # uint8: range [0, 255] -> subtract 128 and divide by 128 + audio = (audio.astype(np.float32) - 128.0) / 128.0 + elif audio.dtype in [np.int16, np.int32, np.int64]: + # Signed integers: use iinfo to get max value and normalize + dtype_info = np.iinfo(audio.dtype) + max_val = max(abs(dtype_info.min), abs(dtype_info.max)) + audio = audio.astype(np.float32) / float(max_val) + elif audio.dtype == np.float64: + # float64: cast to float32 + audio = audio.astype(np.float32) + elif audio.dtype != np.float32: + # Fallback for other types: assume int16 range + audio = audio.astype(np.float32) / 32768.0 + + return get_pitch_with_swift_f0(audio, sample_rate) + + +def get_pitch_with_swift_f0( + audio: np.ndarray, sample_rate: int ) -> PitchedData: - """Pitch with crepe""" + """Pitch detection using SwiftF0 - # Info: The model is trained on 16 kHz audio, so if the input audio has a different sample rate, it will be first resampled to 16 kHz using resampy inside crepe. + SwiftF0 processes audio at 16kHz with 256-sample hop size internally. + Returns frames at approximately 62.5 ms intervals. + """ + detector = _get_detector() - times, frequencies, confidence, activation = crepe.predict( - audio, sample_rate, model_capacity, step_size=step_size, viterbi=True - ) + # Detect pitch + result = detector.detect_from_array(audio, sample_rate) - # convert to native float for serialization - confidence = [float(x) for x in confidence] + # Convert to PitchedData format + times = [float(t) for t in result.timestamps] + frequencies = [float(f) for f in result.pitch_hz] + confidence = [float(c) for c in result.confidence] return PitchedData(times, frequencies, confidence) @@ -53,39 +86,6 @@ def get_pitched_data_with_high_confidence( return new_pitched_data -# Todo: Unused -def pitch_each_chunk_with_crepe(directory: str, - crepe_model_capacity: str, - crepe_step_size: int, - tensorflow_device: str) -> list[str]: - """Pitch each chunk with crepe and return midi notes""" - print(f"{ULTRASINGER_HEAD} Pitching each chunk with {blue_highlighted('crepe')}") - - midi_notes = [] - for filename in sorted( - [f for f in os.listdir(directory) if f.endswith(".wav")], - key=lambda x: int(x.split("_")[1]), - ): - filepath = os.path.join(directory, filename) - # todo: stepsize = duration? then when shorter than "it" it should take the duration. Otherwise there a more notes - pitched_data = get_pitch_with_crepe_file( - filepath, - crepe_model_capacity, - crepe_step_size, - tensorflow_device, - ) - conf_f = get_frequencies_with_high_confidence( - pitched_data.frequencies, pitched_data.confidence - ) - - notes = convert_frequencies_to_notes(conf_f) - note = most_frequent(notes)[0][0] - - midi_notes.append(note) - # todo: Progress? - # print(filename + " f: " + str(mean)) - - return midi_notes class Pitcher: """Docstring""" diff --git a/src/modules/ProcessData.py b/src/modules/ProcessData.py index a3350fd6..4be775a9 100644 --- a/src/modules/ProcessData.py +++ b/src/modules/ProcessData.py @@ -26,11 +26,13 @@ class MediaInfo: cover_url: Optional[str] = None video_url: Optional[str] = None music_key: Optional[str] = None # (e.g., "C major", "A minor") + video_extension: Optional[str] = None # (e.g., "mp4", "mkv") + audio_extension: Optional[str] = None # (e.g., "mp3", "m4a") @dataclass class ProcessData: """Data for processing""" - process_data_paths: ProcessDataPaths = ProcessDataPaths() + process_data_paths: ProcessDataPaths = field(default_factory=ProcessDataPaths) basename: Optional[str] = None media_info: Optional[MediaInfo] = None transcribed_data: Optional[List[TranscribedData]] = field(default_factory=list) diff --git a/src/modules/Speech_Recognition/Whisper.py b/src/modules/Speech_Recognition/Whisper.py index 7896134f..c0cd6e49 100644 --- a/src/modules/Speech_Recognition/Whisper.py +++ b/src/modules/Speech_Recognition/Whisper.py @@ -1,7 +1,26 @@ """Whisper Speech Recognition Module""" +import os import inspect import textwrap + +# Set environment variable to handle cuDNN loading issues +# CUDA_MODULE_LOADING=LAZY allows PyTorch to continue even if some cuDNN modules are not found +#os.environ['CUDA_MODULE_LOADING'] = 'LAZY' + import torch + +# Fix for PyTorch 2.6+ compatibility with omegaconf and old models +# MUST be done before importing whisperx or any other modules that use torch.load +# Monkey-patch torch.load to use weights_only=False for compatibility with older models +# This is necessary because PyTorch 2.6+ changed the default to weights_only=True +# but WhisperX/Pyannote models were saved with pickle and require weights_only=False +_original_torch_load = torch.load +def _patched_torch_load(*args, **kwargs): + # Force weights_only=False regardless of what was passed + kwargs['weights_only'] = False + return _original_torch_load(*args, **kwargs) +torch.load = _patched_torch_load + import whisperx from enum import Enum from torch.cuda import OutOfMemoryError @@ -80,68 +99,15 @@ def transcribe_with_whisper( keep_numbers: bool = False, ) -> TranscriptionResult: """Transcribe with whisper""" - # Info: Monkey Patch FasterWhisperPipeline.detect_language to include error handling for low confidence - src = textwrap.dedent(inspect.getsource(whisperx.asr.FasterWhisperPipeline.detect_language)) - # Replace the relevant part of the method - start_token = "if audio.shape[0] < N_SAMPLES:" - end_token = "return language" - replacement = """\ - #Added imports - from modules.console_colors import ULTRASINGER_HEAD, blue_highlighted, red_highlighted - from Settings import Settings - from inputimeout import inputimeout, TimeoutOccurred - #End Import addition - if audio.shape[0] < N_SAMPLES: - print("Warning: audio is shorter than 30s, language detection may be inaccurate.") - model_n_mels = self.model.feat_kwargs.get("feature_size") - segment = log_mel_spectrogram(audio[: N_SAMPLES], - n_mels=model_n_mels if model_n_mels is not None else 80, - padding=0 if audio.shape[0] >= N_SAMPLES else N_SAMPLES - audio.shape[0]) - encoder_output = self.model.encode(segment) - results = self.model.model.detect_language(encoder_output) - language_token, language_probability = results[0][0] - language = language_token[2:-2] - print(f"Detected language: {language} ({language_probability:.2f}) in first 30s of audio...") - #Added handling for low detection probability - if language_probability < Settings.CONFIDENCE_THRESHOLD: - print(f"{ULTRASINGER_HEAD} {red_highlighted('Warning:')} Language detection probability for detected language {language} is below {Settings.CONFIDENCE_THRESHOLD}, results may be inaccurate.") - print(f"{ULTRASINGER_HEAD} Override the language below or re-run with parameter {blue_highlighted('--language xx')} to specify the song language...") - try: - response = inputimeout( - prompt=f"{ULTRASINGER_HEAD} Do you want to continue with {language} (default) or override with another language (y)? (y/n): ", - timeout=Settings.CONFIDENCE_PROMPT_TIMEOUT - ).strip().lower() - except TimeoutOccurred: - import locale - print(f"{ULTRASINGER_HEAD} No user input received in {Settings.CONFIDENCE_PROMPT_TIMEOUT} seconds. Attempting automatic override with system locale.") - print(f"{ULTRASINGER_HEAD} Trying to get language from default locale...") - current_locale = locale.getlocale() - if current_locale[0]: - language_code = current_locale[0][:2].strip().lower() - print(f"{ULTRASINGER_HEAD} Found language code: {language_code} in locale. Setting language to {blue_highlighted(language_code)}...") - language = language_code - else: - print(f"{ULTRASINGER_HEAD} No locale is set.") - response = 'n' - language_response = response == 'y' - if language_response: - language = input(f"{ULTRASINGER_HEAD} Please enter the language code for the language you want to use (e.g. 'en', 'de', 'es', etc.): ").strip().lower() - #End addition - """ - new_src = replace_code_lines(src, start_token, end_token, replacement) - # Compile it and execute it in the target module's namespace - exec(compile(new_src, "", "exec"), whisperx.asr.__dict__) - whisperx.asr.FasterWhisperPipeline.detect_language = whisperx.asr.detect_language - #End Monkey Patch - # Info: Regardless of the audio sampling rate used in the original audio file, whisper resample the audio signal to 16kHz (via ffmpeg). So the standard input from (44.1 or 48 kHz) should work. - print( f"{ULTRASINGER_HEAD} Loading {blue_highlighted('whisper')} with model {blue_highlighted(model.value)} and {red_highlighted(device)} as worker" ) if alignment_model is not None: print(f"{ULTRASINGER_HEAD} using alignment model {blue_highlighted(alignment_model)}") + # Fixme: Why it is not none for cpu? + #compute_type = "int8" if compute_type is None: compute_type = "float16" if device == "cuda" else "int8" @@ -196,6 +162,9 @@ def transcribe_with_whisper( return TranscriptionResult(transcribed_data, detected_language) except ValueError as value_error: + # Restore original torch.load in case of error + torch.load = _original_torch_load + if ( "Requested float16 compute type, but the target device or backend do not support efficient float16 computation." in str(value_error.args[0]) @@ -207,14 +176,24 @@ def transcribe_with_whisper( raise value_error except OutOfMemoryError as oom_exception: + # Restore original torch.load in case of error + torch.load = _original_torch_load + print(oom_exception) print(MEMORY_ERROR_MESSAGE) raise oom_exception except Exception as exception: + # Restore original torch.load in case of error + torch.load = _original_torch_load + if "CUDA failed with error out of memory" in str(exception.args[0]): print(exception) print(MEMORY_ERROR_MESSAGE) raise exception + finally: + # Restore original torch.load after models are loaded + # This ensures other modules (like pitch detection) are not affected by the monkey-patch + torch.load = _original_torch_load def convert_to_transcribed_data(result_aligned): diff --git a/src/modules/Ultrastar/coverter/ultrastar_txt_converter.py b/src/modules/Ultrastar/coverter/ultrastar_txt_converter.py index ff94f4b8..fc9993bc 100644 --- a/src/modules/Ultrastar/coverter/ultrastar_txt_converter.py +++ b/src/modules/Ultrastar/coverter/ultrastar_txt_converter.py @@ -66,11 +66,14 @@ def create_ultrastar_txt_from_automation( ultrastar_txt = UltrastarTxtValue() ultrastar_txt.version = format_version.value - ultrastar_txt.mp3 = basename + ".m4a" - ultrastar_txt.audio = basename + ".m4a" - ultrastar_txt.vocals = basename + " [Vocals].m4a" - ultrastar_txt.instrumental = basename + " [Instrumental].m4a" - ultrastar_txt.video = basename + ".mp4" + if media_info.audio_extension is None: + raise Exception("Missing Audio extension. It is required to create Ultrastar txt") + ultrastar_txt.mp3 = basename + "." + media_info.audio_extension + ultrastar_txt.audio = basename + "." + media_info.audio_extension + ultrastar_txt.vocals = basename + " [Vocals]." + media_info.audio_extension + ultrastar_txt.instrumental = basename + " [Instrumental]." + media_info.audio_extension + if media_info.video_extension is not None: + ultrastar_txt.video = basename + "." + media_info.video_extension ultrastar_txt.language = media_info.language cover = basename + " [CO].jpg" ultrastar_txt.cover = ( @@ -91,7 +94,8 @@ def create_ultrastar_txt_from_automation( if media_info.cover_url is not None: ultrastar_txt.coverUrl = media_info.cover_url if media_info.music_key is not None: - ultrastar_txt.tags = media_info.music_key + # todo: as list add here? + ultrastar_txt.tags = f"key: {media_info.music_key}" ultrastar_file_output_path = os.path.join(song_folder_output_path, basename + ".txt") create_ultrastar_txt( @@ -103,7 +107,7 @@ def create_ultrastar_txt_from_automation( if create_karaoke and version.parse(format_version.value) < version.parse(FormatVersion.V1_1_0.value): title = basename + " [Karaoke]" ultrastar_txt.title = title - ultrastar_txt.mp3 = title + ".m4a" + ultrastar_txt.mp3 = title + "." + media_info.audio_extension karaoke_output_path = os.path.join(song_folder_output_path, title) karaoke_txt_output_path = karaoke_output_path + ".txt" create_ultrastar_txt( diff --git a/src/modules/Ultrastar/ultrastar_parser.py b/src/modules/Ultrastar/ultrastar_parser.py index f3024dd5..55d5645c 100644 --- a/src/modules/Ultrastar/ultrastar_parser.py +++ b/src/modules/Ultrastar/ultrastar_parser.py @@ -33,35 +33,33 @@ def parse(input_file: str) -> UltrastarTxtValue: for line in txt: count += 1 if line.startswith("#"): - if line.startswith(f"#{UltrastarTxtTag.ARTIST}"): + if line.startswith(f"#{UltrastarTxtTag.ARTIST.value}"): ultrastar_class.artist = line.split(":")[1].replace("\n", "") - elif line.startswith(f"#{UltrastarTxtTag.TITLE}"): + elif line.startswith(f"#{UltrastarTxtTag.TITLE.value}"): ultrastar_class.title = line.split(":")[1].replace("\n", "") - elif line.startswith(f"#{UltrastarTxtTag.MP3}"): + elif line.startswith(f"#{UltrastarTxtTag.MP3.value}"): ultrastar_class.mp3 = line.split(":")[1].replace("\n", "") - elif line.startswith(f"#{UltrastarTxtTag.AUDIO}"): + elif line.startswith(f"#{UltrastarTxtTag.AUDIO.value}"): ultrastar_class.audio = line.split(":")[1].replace("\n", "") - elif line.startswith(f"#{UltrastarTxtTag.VIDEO}"): + elif line.startswith(f"#{UltrastarTxtTag.VIDEO.value}"): ultrastar_class.video = line.split(":")[1].replace("\n", "") - elif line.startswith(f"#{UltrastarTxtTag.GAP}"): + elif line.startswith(f"#{UltrastarTxtTag.GAP.value}"): ultrastar_class.gap = line.split(":")[1].replace("\n", "") - elif line.startswith(f"#{UltrastarTxtTag.BPM}"): + elif line.startswith(f"#{UltrastarTxtTag.BPM.value}"): ultrastar_class.bpm = line.split(":")[1].replace("\n", "") - elif line.startswith(f"#{UltrastarTxtTag.VIDEO}"): - ultrastar_class.video = line.split(":")[1].replace("\n", "") - elif line.startswith(f"#{UltrastarTxtTag.VIDEOGAP}"): + elif line.startswith(f"#{UltrastarTxtTag.VIDEOGAP.value}"): ultrastar_class.videoGap = line.split(":")[1].replace("\n", "") - elif line.startswith(f"#{UltrastarTxtTag.COVER}"): + elif line.startswith(f"#{UltrastarTxtTag.COVER.value}"): ultrastar_class.cover = line.split(":")[1].replace("\n", "") - elif line.startswith(f"#{UltrastarTxtTag.BACKGROUND}"): + elif line.startswith(f"#{UltrastarTxtTag.BACKGROUND.value}"): ultrastar_class.background = line.split(":")[1].replace("\n", "") elif line.startswith( ( - f"{UltrastarTxtNoteTypeTag.FREESTYLE} ", - f"{UltrastarTxtNoteTypeTag.NORMAL} ", - f"{UltrastarTxtNoteTypeTag.GOLDEN} ", - f"{UltrastarTxtNoteTypeTag.RAP} ", - f"{UltrastarTxtNoteTypeTag.RAP_GOLDEN} ", + f"{UltrastarTxtNoteTypeTag.FREESTYLE.value} ", + f"{UltrastarTxtNoteTypeTag.NORMAL.value} ", + f"{UltrastarTxtNoteTypeTag.GOLDEN.value} ", + f"{UltrastarTxtNoteTypeTag.RAP.value} ", + f"{UltrastarTxtNoteTypeTag.RAP_GOLDEN.value} ", ) ): parts = line.split() @@ -86,7 +84,7 @@ def parse(input_file: str) -> UltrastarTxtValue: return ultrastar_class -def parse_ultrastar_txt(input_file_path: str, output_folder_path: str) -> tuple[str, str, str, UltrastarTxtValue]: +def parse_ultrastar_txt(input_file_path: str, output_folder_path: str) -> tuple[str, str, str, UltrastarTxtValue, str]: """Parse Ultrastar txt""" ultrastar_class = parse(input_file_path) @@ -100,6 +98,8 @@ def parse_ultrastar_txt(input_file_path: str, output_folder_path: str) -> tuple[ f"an audio file." ) exit(1) + _, audio_ext_with_dot = os.path.splitext(ultrastar_mp3_name) + audio_ext = audio_ext_with_dot.lstrip('.') song_output = os.path.join( output_folder_path, @@ -118,4 +118,5 @@ def parse_ultrastar_txt(input_file_path: str, output_folder_path: str) -> tuple[ song_output, str(audio_file_path), ultrastar_class, + audio_ext ) diff --git a/src/modules/Ultrastar/ultrastar_score_calculator.py b/src/modules/Ultrastar/ultrastar_score_calculator.py index bdf69b88..dfe5e5ef 100644 --- a/src/modules/Ultrastar/ultrastar_score_calculator.py +++ b/src/modules/Ultrastar/ultrastar_score_calculator.py @@ -43,13 +43,13 @@ class Points: def add_point(note_type: str, points: Points) -> Points: """Add calculated points to the points object.""" - if note_type == UltrastarTxtNoteTypeTag.NORMAL: + if note_type == UltrastarTxtNoteTypeTag.NORMAL.value: points.notes += 1 - elif note_type == UltrastarTxtNoteTypeTag.GOLDEN: + elif note_type == UltrastarTxtNoteTypeTag.GOLDEN.value: points.golden_notes += 2 - elif note_type == UltrastarTxtNoteTypeTag.RAP: + elif note_type == UltrastarTxtNoteTypeTag.RAP.value: points.rap += 1 - elif note_type == UltrastarTxtNoteTypeTag.RAP_GOLDEN: + elif note_type == UltrastarTxtNoteTypeTag.RAP_GOLDEN.value: points.golden_rap += 2 return points @@ -75,6 +75,9 @@ def get_score(points: Points) -> Score: if points.line_bonus == 0 else MAX_SONG_SCORE - MAX_SONG_LINE_BONUS ) + if(points.parts == 0): + print(f"{ULTRASINGER_HEAD} No parts found, returning 0 score") + return score score.notes = round( score.max_score * (points.notes + points.rap) / points.parts ) @@ -100,6 +103,9 @@ def calculate_score(pitched_data: PitchedData, ultrastar_class: UltrastarTxtValu simple_points = Points() accurate_points = Points() + if(len(ultrastar_class.UltrastarNoteLines) == 0): + print(f"{ULTRASINGER_HEAD} No note lines found in Ultrastar txt, returning 0 points") + return get_score(simple_points), get_score(accurate_points) reachable_line_bonus_per_word = MAX_SONG_LINE_BONUS / len(ultrastar_class.UltrastarNoteLines) step_size = 0.09 # Todo: Whats is the step size of the game? Its not 1/bps -> one beat in seconds s = 60/bpm diff --git a/src/modules/Ultrastar/ultrastar_writer.py b/src/modules/Ultrastar/ultrastar_writer.py index 81591184..1ef5d1ee 100644 --- a/src/modules/Ultrastar/ultrastar_writer.py +++ b/src/modules/Ultrastar/ultrastar_writer.py @@ -53,42 +53,43 @@ def create_ultrastar_txt( gap = midi_segments[0].start if version.parse(ultrastar_class.version) >= version.parse("1.0.0"): - file.write(f"#{UltrastarTxtTag.VERSION}:{ultrastar_class.version}\n"), - file.write(f"#{UltrastarTxtTag.ARTIST}:{ultrastar_class.artist}\n") - file.write(f"#{UltrastarTxtTag.TITLE}:{ultrastar_class.title}\n") + file.write(f"#{UltrastarTxtTag.VERSION.value}:{ultrastar_class.version}\n") + file.write(f"#{UltrastarTxtTag.ARTIST.value}:{ultrastar_class.artist}\n") + file.write(f"#{UltrastarTxtTag.TITLE.value}:{ultrastar_class.title}\n") if ultrastar_class.year is not None: - file.write(f"#{UltrastarTxtTag.YEAR}:{ultrastar_class.year}\n") + file.write(f"#{UltrastarTxtTag.YEAR.value}:{ultrastar_class.year}\n") if ultrastar_class.language is not None: - file.write(f"#{UltrastarTxtTag.LANGUAGE}:{get_language_name(ultrastar_class.language)}\n") + file.write(f"#{UltrastarTxtTag.LANGUAGE.value}:{get_language_name(ultrastar_class.language)}\n") if ultrastar_class.genre: - file.write(f"#{UltrastarTxtTag.GENRE}:{ultrastar_class.genre}\n") + file.write(f"#{UltrastarTxtTag.GENRE.value}:{ultrastar_class.genre}\n") if ultrastar_class.cover is not None: - file.write(f"#{UltrastarTxtTag.COVER}:{ultrastar_class.cover}\n") + file.write(f"#{UltrastarTxtTag.COVER.value}:{ultrastar_class.cover}\n") if version.parse(ultrastar_class.version) >= version.parse("1.2.0"): if ultrastar_class.coverUrl is not None: - file.write(f"#{UltrastarTxtTag.COVERURL}:{ultrastar_class.coverUrl}\n") + file.write(f"#{UltrastarTxtTag.COVERURL.value}:{ultrastar_class.coverUrl}\n") if ultrastar_class.background is not None: - file.write(f"#{UltrastarTxtTag.BACKGROUND}:{ultrastar_class.background}\n") - file.write(f"#{UltrastarTxtTag.MP3}:{ultrastar_class.mp3}\n") + file.write(f"#{UltrastarTxtTag.BACKGROUND.value}:{ultrastar_class.background}\n") + file.write(f"#{UltrastarTxtTag.MP3.value}:{ultrastar_class.mp3}\n") if version.parse(ultrastar_class.version) >= version.parse("1.1.0"): - file.write(f"#{UltrastarTxtTag.AUDIO}:{ultrastar_class.audio}\n") + file.write(f"#{UltrastarTxtTag.AUDIO.value}:{ultrastar_class.audio}\n") if ultrastar_class.vocals is not None: - file.write(f"#{UltrastarTxtTag.VOCALS}:{ultrastar_class.vocals}\n") + file.write(f"#{UltrastarTxtTag.VOCALS.value}:{ultrastar_class.vocals}\n") if ultrastar_class.instrumental is not None: - file.write(f"#{UltrastarTxtTag.INSTRUMENTAL}:{ultrastar_class.instrumental}\n") - if ultrastar_class.tags is not None: - file.write(f"#{UltrastarTxtTag.TAGS}:{ultrastar_class.tags}\n") + file.write(f"#{UltrastarTxtTag.INSTRUMENTAL.value}:{ultrastar_class.instrumental}\n") if ultrastar_class.video is not None: - file.write(f"#{UltrastarTxtTag.VIDEO}:{ultrastar_class.video}\n") + file.write(f"#{UltrastarTxtTag.VIDEO.value}:{ultrastar_class.video}\n") if ultrastar_class.videoGap is not None: - file.write(f"#{UltrastarTxtTag.VIDEOGAP}:{ultrastar_class.videoGap}\n") + file.write(f"#{UltrastarTxtTag.VIDEOGAP.value}:{ultrastar_class.videoGap}\n") if version.parse(ultrastar_class.version) >= version.parse("1.2.0"): if ultrastar_class.videoUrl is not None: - file.write(f"#{UltrastarTxtTag.VIDEOURL}:{ultrastar_class.videoUrl}\n") - file.write(f"#{UltrastarTxtTag.BPM}:{round(ultrastar_bpm, 2)}\n") # not the real BPM! - file.write(f"#{UltrastarTxtTag.GAP}:{int(gap * 1000)}\n") - file.write(f"#{UltrastarTxtTag.CREATOR}:{ultrastar_class.creator}\n") - file.write(f"#{UltrastarTxtTag.COMMENT}:{ultrastar_class.comment}\n") + file.write(f"#{UltrastarTxtTag.VIDEOURL.value}:{ultrastar_class.videoUrl}\n") + file.write(f"#{UltrastarTxtTag.BPM.value}:{round(ultrastar_bpm, 2)}\n") # not the real BPM! + file.write(f"#{UltrastarTxtTag.GAP.value}:{int(gap * 1000)}\n") + if version.parse(ultrastar_class.version) >= version.parse("1.1.0"): + if ultrastar_class.tags is not None: + file.write(f"#{UltrastarTxtTag.TAGS.value}:{ultrastar_class.tags}\n") + file.write(f"#{UltrastarTxtTag.CREATOR.value}:{ultrastar_class.creator}\n") + file.write(f"#{UltrastarTxtTag.COMMENT.value}:{ultrastar_class.comment}\n") # Write the singing part previous_end_beat = 0 @@ -118,7 +119,7 @@ def create_ultrastar_txt( # 'n2' duration at real beat # 'n3' pitch where 0 == C4 # 'w' lyric - line = f"{UltrastarTxtNoteTypeTag.NORMAL} " \ + line = f"{UltrastarTxtNoteTypeTag.NORMAL.value} " \ f"{str(start_beat)} " \ f"{str(duration)} " \ f"{str(convert_midi_note_to_ultrastar_note(midi_segment))} " \ @@ -140,11 +141,11 @@ def create_ultrastar_txt( second_to_beat(midi_segment.end - gap, real_bpm) * multiplication ) - linebreak = f"{UltrastarTxtTag.LINEBREAK} " \ + linebreak = f"{UltrastarTxtTag.LINEBREAK.value} " \ f"{str(round(show_next))}\n" file.write(linebreak) separated_word_silence = [] - file.write(f"{UltrastarTxtTag.FILE_END}") + file.write(f"{UltrastarTxtTag.FILE_END.value}") def deviation(silence_parts): @@ -191,7 +192,7 @@ def create_repitched_txt_from_ultrastar_data( # todo: just add '_repitched' to input_file with open(output_repitched_ultrastar, "w", encoding=FILE_ENCODING) as file: for line in txt: - if line.startswith(f"{UltrastarTxtNoteTypeTag.NORMAL} "): + if line.startswith(f"{UltrastarTxtNoteTypeTag.NORMAL.value} "): parts = re.findall(r"\S+|\s+", line) # between are whitespaces # [0] : @@ -214,21 +215,21 @@ def add_score_to_ultrastar_txt(ultrastar_file_output: str, score: Score) -> None text = text.split("\n") for i, line in enumerate(text): - if line.startswith(f"#{UltrastarTxtTag.COMMENT}:"): + if line.startswith(f"#{UltrastarTxtTag.COMMENT.value}:"): text[ i ] = f"{line} | Score: total: {score.score}, notes: {score.notes} line: {score.line_bonus}, golden: {score.golden}" break if line.startswith(( - f"{UltrastarTxtNoteTypeTag.FREESTYLE} ", - f"{UltrastarTxtNoteTypeTag.NORMAL} ", - f"{UltrastarTxtNoteTypeTag.GOLDEN} ", - f"{UltrastarTxtNoteTypeTag.RAP} ", - f"{UltrastarTxtNoteTypeTag.RAP_GOLDEN} ")): + f"{UltrastarTxtNoteTypeTag.FREESTYLE.value} ", + f"{UltrastarTxtNoteTypeTag.NORMAL.value} ", + f"{UltrastarTxtNoteTypeTag.GOLDEN.value} ", + f"{UltrastarTxtNoteTypeTag.RAP.value} ", + f"{UltrastarTxtNoteTypeTag.RAP_GOLDEN.value} ")): text.insert( i, - f"#{UltrastarTxtTag.COMMENT}: UltraSinger [GitHub] | Score: total: {score.score}, notes: {score.notes} line: {score.line_bonus}, golden: {score.golden}", + f"#{UltrastarTxtTag.COMMENT.value}: UltraSinger [GitHub] | Score: total: {score.score}, notes: {score.notes} line: {score.line_bonus}, golden: {score.golden}", ) break diff --git a/src/modules/ffmpeg_helper.py b/src/modules/ffmpeg_helper.py index 420bcfef..3ef9886e 100644 --- a/src/modules/ffmpeg_helper.py +++ b/src/modules/ffmpeg_helper.py @@ -91,30 +91,78 @@ def is_video_file(file_path: str) -> bool: return False -def separate_audio_video(video_with_audio_path: str, basename_without_ext: str, output_folder: str) -> tuple[str, str]: +def get_audio_codec_and_extension(video_file_path: str) -> str: + """ + Detect audio codec from video file and return codec name and appropriate file extension. + """ + try: + _, ffprobe_path = get_ffmpeg_and_ffprobe_paths() + + cmd = [ + ffprobe_path, + "-v", "error", + "-select_streams", "a:0", + "-show_entries", "stream=codec_name", + "-of", "default=noprint_wrappers=1:nokey=1", + video_file_path + ] + + result = subprocess.run(cmd, capture_output=True, text=True) + if result.returncode != 0 or not result.stdout.strip(): + # Default to wav if detection fails + return "wav" + + codec_name = result.stdout.strip() + + codec_to_extension = { + "aac": "aac", + "mp3": "mp3", + "opus": "opus", + "vorbis": "ogg", + "flac": "flac", + "pcm_s16le": "wav", + "pcm_s24le": "wav", + "pcm_s32le": "wav", + "ac3": "ac3", + "eac3": "eac3", + } + + extension = codec_to_extension.get(codec_name, "wav") + return extension + + except Exception: + # Default to wav if detection fails + return "wav" + + +def separate_audio_video(video_with_audio_path: str, basename_without_ext: str, output_folder: str) -> tuple[str, str, str, str]: """ Separate audio and video from a video file. - Returns tuple of (audio_file_path, video_file_path) + Automatically detects the audio codec and uses the appropriate file extension. """ from modules.console_colors import ULTRASINGER_HEAD - # Get original video file extension - _, video_ext = os.path.splitext(video_with_audio_path) + # Get original video file extension without the dot + _, video_ext_with_dot = os.path.splitext(video_with_audio_path) + video_ext = video_ext_with_dot.lstrip('.') + + # Detect audio codec and get appropriate extension + audio_ext = get_audio_codec_and_extension(video_with_audio_path) print(f"{ULTRASINGER_HEAD} Extracting audio from video") - audio_file_path = os.path.join(output_folder, f"{basename_without_ext}.m4a") + audio_file_path = os.path.join(output_folder, f"{basename_without_ext}.{audio_ext}") extract_audio(video_with_audio_path, audio_file_path) print(f"{ULTRASINGER_HEAD} Creating video without audio") - video_only_path = os.path.join(output_folder, f"{basename_without_ext}_video{video_ext}") + video_only_path = os.path.join(output_folder, f"{basename_without_ext}_video.{video_ext}") remove_audio_from_video(video_with_audio_path, video_only_path) # Remove original video with audio os.remove(video_with_audio_path) # Rename video without audio to final name - final_video_path = os.path.join(output_folder, f"{basename_without_ext}{video_ext}") + final_video_path = os.path.join(output_folder, f"{basename_without_ext}.{video_ext}") os.rename(video_only_path, final_video_path) - return audio_file_path, final_video_path + return audio_file_path, final_video_path, audio_ext, video_ext