From 51f591a7e5f9e67b055093254e97dd0a7bf95fb8 Mon Sep 17 00:00:00 2001
From: Naman Thapliyal <namanthapliyal1999@gmai.com>
Date: Sat, 19 Jul 2025 03:34:50 +0530
Subject: [PATCH 1/9] Fastapi app

---
 Dockerfile     |  44 ++++++++++++++++++++
 fastapi_app.py | 106 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 150 insertions(+)
 create mode 100644 Dockerfile
 create mode 100644 fastapi_app.py

diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 00000000..b4565a0f
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,44 @@
+# File: services/OpenVoice/Dockerfile
+# Usa l'immagine di base di Ubuntu
+FROM ubuntu:22.04
+
+# Aggiorna il sistema e installa le dipendenze necessarie
+RUN apt-get update && DEBIEN_FRONTEND=noninteractive apt-get install -y \
+    sudo \
+    python3.9 \
+    python3-distutils \
+    python3-pip \
+    ffmpeg \
+    git
+
+# Aggiorna pip
+RUN pip install --upgrade pip
+
+# Imposta il working directory nel container
+WORKDIR /app
+
+# Installa openai-whisper
+RUN git clone https://github.com/myshell-ai/OpenVoice openvoice
+
+# Install FastAPI and Uvicorn, and other dependencies
+RUN pip install uvicorn fastapi python-multipart langid faster-whisper whisper-timestamped unidecode eng-to-ipa pypinyin cn2an
+
+# Imposta il working directory nel container
+WORKDIR /app/openvoice
+
+RUN pip install -e .
+RUN pip install soundfile librosa inflect jieba silero
+
+RUN apt -y install -qq aria2 unzip
+RUN aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/camenduru/OpenVoice/resolve/main/checkpoints_1226.zip -d /app/openvoice -o checkpoints_1226.zip
+RUN unzip /app/openvoice/checkpoints_1226.zip 
+RUN mv /app/openvoice/checkpoints /app/openvoice/openvoice/checkpoints 
+RUN mv /app/openvoice/resources /app/openvoice/openvoice/resources 
+
+EXPOSE 7860
+
+# Set the working directory to the openvoice directory where fastapi_app.py will reside
+WORKDIR /app/openvoice/openvoice
+
+# Command to run the FastAPI application with Uvicorn
+CMD ["uvicorn", "fastapi_app:app", "--host", "0.0.0.0", "--port", "7860"]
diff --git a/fastapi_app.py b/fastapi_app.py
new file mode 100644
index 00000000..013333b0
--- /dev/null
+++ b/fastapi_app.py
@@ -0,0 +1,106 @@
+from fastapi import FastAPI, File, UploadFile, Form, HTTPException
+from fastapi.responses import FileResponse
+import os
+import torch
+import langid
+from openvoice import se_extractor
+from openvoice.api import BaseSpeakerTTS, ToneColorConverter
+import shutil
+
+app = FastAPI()
+
+# Configuration from openvoice_app.py
+en_ckpt_base = 'checkpoints/base_speakers/EN'
+zh_ckpt_base = 'checkpoints/base_speakers/ZH'
+ckpt_converter = 'checkpoints/converter'
+device = 'cuda' if torch.cuda.is_available() else 'cpu'
+output_dir = 'outputs'
+os.makedirs(output_dir, exist_ok=True)
+
+# Load models
+en_base_speaker_tts = BaseSpeakerTTS(f'{en_ckpt_base}/config.json', device=device)
+en_base_speaker_tts.load_ckpt(f'{en_ckpt_base}/checkpoint.pth')
+zh_base_speaker_tts = BaseSpeakerTTS(f'{zh_ckpt_base}/config.json', device=device)
+zh_base_speaker_tts.load_ckpt(f'{zh_ckpt_base}/checkpoint.pth')
+tone_color_converter = ToneColorConverter(f'{ckpt_converter}/config.json', device=device)
+tone_color_converter.load_ckpt(f'{ckpt_converter}/checkpoint.pth')
+
+# Load speaker embeddings
+en_source_default_se = torch.load(f'{en_ckpt_base}/en_default_se.pth').to(device)
+en_source_style_se = torch.load(f'{en_ckpt_base}/en_style_se.pth').to(device)
+zh_source_se = torch.load(f'{zh_ckpt_base}/zh_default_se.pth').to(device)
+
+supported_languages = ['zh', 'en']
+
+@app.post("/synthesize/")
+async def synthesize_speech(
+    prompt: str = Form(...),
+    style: str = Form(...),
+    audio_file: UploadFile = File(...),
+    agree: bool = Form(...)
+):
+    if not agree:
+        raise HTTPException(status_code=400, detail="Please accept the Terms & Condition!")
+
+    # Save the uploaded audio file temporarily
+    temp_audio_path = os.path.join(output_dir, audio_file.filename)
+    with open(temp_audio_path, "wb") as buffer:
+        shutil.copyfileobj(audio_file.file, buffer)
+
+    language_predicted = langid.classify(prompt)[0].strip()
+    print(f"Detected language: {language_predicted}")
+
+    if language_predicted not in supported_languages:
+        os.remove(temp_audio_path)
+        raise HTTPException(status_code=400, detail=f"The detected language {language_predicted} for your input text is not in our Supported Languages: {supported_languages}")
+
+    if language_predicted == "zh":
+        tts_model = zh_base_speaker_tts
+        source_se = zh_source_se
+        language = 'Chinese'
+        if style not in ['default']:
+            os.remove(temp_audio_path)
+            raise HTTPException(status_code=400, detail=f"The style {style} is not supported for Chinese, which should be in ['default']")
+    else:
+        tts_model = en_base_speaker_tts
+        if style == 'default':
+            source_se = en_source_default_se
+        else:
+            source_se = en_source_style_se
+        language = 'English'
+        if style not in ['default', 'whispering', 'shouting', 'excited', 'cheerful', 'terrified', 'angry', 'sad', 'friendly']:
+            os.remove(temp_audio_path)
+            raise HTTPException(status_code=400, detail=f"The style {style} is not supported for English, which should be in ['default', 'whispering', 'shouting', 'excited', 'cheerful', 'terrified', 'angry', 'sad', 'friendly']")
+
+    if len(prompt) < 2:
+        os.remove(temp_audio_path)
+        raise HTTPException(status_code=400, detail="Please give a longer prompt text")
+    if len(prompt) > 200:
+        os.remove(temp_audio_path)
+        raise HTTPException(status_code=400, detail="Text length limited to 200 characters for this demo, please try shorter text.")
+
+    try:
+        target_se, audio_name = se_extractor.get_se(temp_audio_path, tone_color_converter, target_dir='processed', vad=True)
+    except Exception as e:
+        os.remove(temp_audio_path)
+        raise HTTPException(status_code=500, detail=f"Get target tone color error: {str(e)}")
+
+    src_path = os.path.join(output_dir, 'tmp.wav')
+    tts_model.tts(prompt, src_path, speaker=style, language=language)
+
+    save_path = os.path.join(output_dir, 'output.wav')
+    encode_message = "@MyShell"
+    tone_color_converter.convert(
+        audio_src_path=src_path,
+        src_se=source_se,
+        tgt_se=target_se,
+        output_path=save_path,
+        message=encode_message
+    )
+
+    # Clean up temporary files
+    os.remove(temp_audio_path)
+    os.remove(src_path)
+
+    return FileResponse(save_path, media_type="audio/wav", filename="synthesized_audio.wav")
+

From 2b1f76c88f0fcaedfec3dd1608d0ef757a61ded6 Mon Sep 17 00:00:00 2001
From: Naman Thapliyal <namanthapliyal1999@gmai.com>
Date: Sat, 19 Jul 2025 03:36:18 +0530
Subject: [PATCH 2/9] Fastapi app comments added

---
 Dockerfile | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index b4565a0f..80064f44 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,8 +1,8 @@
 # File: services/OpenVoice/Dockerfile
-# Usa l'immagine di base di Ubuntu
+# Use Ubuntu base image
 FROM ubuntu:22.04
 
-# Aggiorna il sistema e installa le dipendenze necessarie
+# Update the system and install necessary dependencies
 RUN apt-get update && DEBIEN_FRONTEND=noninteractive apt-get install -y \
     sudo \
     python3.9 \
@@ -11,19 +11,19 @@ RUN apt-get update && DEBIEN_FRONTEND=noninteractive apt-get install -y \
     ffmpeg \
     git
 
-# Aggiorna pip
+# Upgrade pip
 RUN pip install --upgrade pip
 
-# Imposta il working directory nel container
+# Set the working directory in the container
 WORKDIR /app
 
-# Installa openai-whisper
+# Install openai-whisper
 RUN git clone https://github.com/myshell-ai/OpenVoice openvoice
 
 # Install FastAPI and Uvicorn, and other dependencies
 RUN pip install uvicorn fastapi python-multipart langid faster-whisper whisper-timestamped unidecode eng-to-ipa pypinyin cn2an
 
-# Imposta il working directory nel container
+# Set the working directory in the container
 WORKDIR /app/openvoice
 
 RUN pip install -e .

From 0a6bcd917c5f05289fea2ab991e4118a1bda6f96 Mon Sep 17 00:00:00 2001
From: Naman Thapliyal <namanthapliyal1999@gmai.com>
Date: Sat, 19 Jul 2025 09:40:32 +0530
Subject: [PATCH 3/9] FastApi app production level

---
 Dockerfile     | 4 ++--
 fastapi_app.py | 4 ++++
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 80064f44..f2d250af 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,6 +1,6 @@
 # File: services/OpenVoice/Dockerfile
 # Use Ubuntu base image
-FROM ubuntu:22.04
+FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04
 
 # Update the system and install necessary dependencies
 RUN apt-get update && DEBIEN_FRONTEND=noninteractive apt-get install -y \
@@ -18,7 +18,7 @@ RUN pip install --upgrade pip
 WORKDIR /app
 
 # Install openai-whisper
-RUN git clone https://github.com/myshell-ai/OpenVoice openvoice
+RUN git clone https://github.com/namanthapliyal/OpenVoice.git openvoice
 
 # Install FastAPI and Uvicorn, and other dependencies
 RUN pip install uvicorn fastapi python-multipart langid faster-whisper whisper-timestamped unidecode eng-to-ipa pypinyin cn2an
diff --git a/fastapi_app.py b/fastapi_app.py
index 013333b0..a31d91d4 100644
--- a/fastapi_app.py
+++ b/fastapi_app.py
@@ -32,6 +32,10 @@
 
 supported_languages = ['zh', 'en']
 
+@app.get("/")
+async def root():
+    return {"message": "Welcome to the OpenVoice API! Server is up and running!"}
+
 @app.post("/synthesize/")
 async def synthesize_speech(
     prompt: str = Form(...),

From b583fb6ddfb2e9c6e71dd6c77659bfd664245cf5 Mon Sep 17 00:00:00 2001
From: Naman Thapliyal <namanthapliyal1999@gmai.com>
Date: Sat, 19 Jul 2025 10:02:18 +0530
Subject: [PATCH 4/9] Updated readme for usage

---
 docs/USAGE.md        |  7 +++--
 docs/docker_usage.md | 74 ++++++++++++++++++++++++++++++++++++++++++++
 fastapi_app.py       |  3 --
 3 files changed, 78 insertions(+), 6 deletions(-)
 create mode 100644 docs/docker_usage.md

diff --git a/docs/USAGE.md b/docs/USAGE.md
index ff051a83..861e0326 100644
--- a/docs/USAGE.md
+++ b/docs/USAGE.md
@@ -4,8 +4,8 @@
 
 - [Quick Use](#quick-use): directly use OpenVoice without installation.
 - [Linux Install](#linux-install): for researchers and developers only.
-    - [V1](#openvoice-v1)
-    - [V2](#openvoice-v2)
+  - [V1](#openvoice-v1)
+  - [V2](#openvoice-v2)
 - [Install on Other Platforms](#install-on-other-platforms): unofficial installation guide contributed by the community
 
 ## Quick Use
@@ -63,6 +63,7 @@ Please see [`demo_part2.ipynb`](../demo_part2.ipynb) for an example for language
 Download the checkpoint from [here](https://myshell-public-repo-host.s3.amazonaws.com/openvoice/checkpoints_v2_0417.zip) and extract it to the `checkpoints_v2` folder.
 
 Install [MeloTTS](https://github.com/myshell-ai/MeloTTS):
+
 ```
 pip install git+https://github.com/myshell-ai/MeloTTS.git
 python -m unidic download
@@ -70,7 +71,6 @@ python -m unidic download
 
 **Demo Usage.** Please see [`demo_part3.ipynb`](../demo_part3.ipynb) for example usage of OpenVoice V2. Now it natively supports English, Spanish, French, Chinese, Japanese and Korean.
 
-
 ## Install on Other Platforms
 
 This section provides the unofficial installation guides by open-source contributors in the community:
@@ -79,5 +79,6 @@ This section provides the unofficial installation guides by open-source contribu
   - [Guide](https://github.com/Alienpups/OpenVoice/blob/main/docs/USAGE_WINDOWS.md) by [@Alienpups](https://github.com/Alienpups)
   - You are welcome to contribute if you have a better installation guide. We will list you here.
 - Docker
+  - [Guide] (https://github.com/namanthapliyal/OpenVoice/docs/docker_usage.md) by [@namanthapliyal](https://github.com/namanthapliyal/)
   - [Guide](https://github.com/StevenJSCF/OpenVoice/blob/update-docs/docs/DF_USAGE.md) by [@StevenJSCF](https://github.com/StevenJSCF)
   - You are welcome to contribute if you have a better installation guide. We will list you here.
diff --git a/docs/docker_usage.md b/docs/docker_usage.md
new file mode 100644
index 00000000..10d5813e
--- /dev/null
+++ b/docs/docker_usage.md
@@ -0,0 +1,74 @@
+## Local Development Setup
+
+Follow these steps to set up and run the application locally for development and debugging.
+
+### 1. Clone the Repository
+
+First, clone this repository to your local machine:
+
+```bash
+git clone https://github.com/namanthapliyal/OpenVoice.git
+cd ./OpenVoice
+```
+
+### 2. Build the Docker Image
+
+In the root location of the project, build the Docker image using the following command:
+
+```bash
+docker build -t openvoice-fastapi .
+```
+
+This command will:
+
+Pull the nvidia/cuda base image.
+Install necessary system dependencies and Python packages.
+Clone the OpenVoice library.
+Download pre-trained checkpoints required for voice synthesis.
+Set up the working directory and expose the application port.
+Build the Docker image.
+
+This process may take some time, especially during the initial download of the base image and checkpoints.
+
+### 3. Run the Docker Container
+
+Once the image is built, you can run a container from it. To enable GPU acceleration and map the application's port to your host machine, use the following command:
+
+```bash
+docker run --gpus all -p 7860:7860 openvoice-fastapi
+
+```
+
+- --gpus all: Exposes all available NVIDIA GPUs on your host to the container. Ensure the NVIDIA Container Toolkit is correctly installed.
+- -p 7860:7860: Maps port 7860 inside the container (where FastAPI runs) to port 7860 on your host machine.
+
+The FastAPI application will now be accessible at http://localhost:7860.
+
+### 4. Interact with the API
+
+You can test the API using curl or any API client (like Postman, Insomnia, or your browser for GET requests). The primary endpoint is /synthesize/ which accepts POST requests with multipart/form-data.
+
+Example curl Request:
+
+```bash
+curl -X POST "http://localhost:7860/synthesize/" \
+  -H "accept: application/json" \
+  -H "Content-Type: multipart/form-data" \
+  -F "prompt=This is a test sentence for voice synthesis." \
+  -F "style=default" \
+  -F "audio_file=@/path/to/your/reference_audio.mp3" \
+  -F "agree=true" \
+  --output synthesized_audio.wav
+```
+
+Parameters:
+
+- prompt (string, required): The text to be synthesized.
+- style (string, required): The speaking style. Supported values: default, whispering, shouting, excited, cheerful, terrified, angry, sad, friendly. (Note: Chinese only supports default).
+- audio_file (file, required): An audio file (.mp3 or .wav) of the reference speaker whose voice you want to clone.
+- agree (boolean, required): Must be true to accept the terms and conditions.
+
+The API will return the synthesized audio as a .wav file.
+
+Output Directory
+Synthesized audio files and temporary processing files will be stored in the outputs/ directory within the container. For local debugging, you might want to mount a volume to persist these outputs on your host machine.
diff --git a/fastapi_app.py b/fastapi_app.py
index a31d91d4..73c9ffc3 100644
--- a/fastapi_app.py
+++ b/fastapi_app.py
@@ -41,10 +41,7 @@ async def synthesize_speech(
     prompt: str = Form(...),
     style: str = Form(...),
     audio_file: UploadFile = File(...),
-    agree: bool = Form(...)
 ):
-    if not agree:
-        raise HTTPException(status_code=400, detail="Please accept the Terms & Condition!")
 
     # Save the uploaded audio file temporarily
     temp_audio_path = os.path.join(output_dir, audio_file.filename)

From ef6ab7fca31511e09027e033a755c29c0ae2546e Mon Sep 17 00:00:00 2001
From: Naman Thapliyal <namanthapliyal1999@gmai.com>
Date: Sat, 19 Jul 2025 10:03:35 +0530
Subject: [PATCH 5/9] Updated readme for usage

---
 docs/USAGE.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/USAGE.md b/docs/USAGE.md
index 861e0326..33debbe5 100644
--- a/docs/USAGE.md
+++ b/docs/USAGE.md
@@ -79,6 +79,6 @@ This section provides the unofficial installation guides by open-source contribu
   - [Guide](https://github.com/Alienpups/OpenVoice/blob/main/docs/USAGE_WINDOWS.md) by [@Alienpups](https://github.com/Alienpups)
   - You are welcome to contribute if you have a better installation guide. We will list you here.
 - Docker
-  - [Guide] (https://github.com/namanthapliyal/OpenVoice/docs/docker_usage.md) by [@namanthapliyal](https://github.com/namanthapliyal/)
+  - [Guide](https://github.com/namanthapliyal/OpenVoice/blob/main/docs/docker_usage.md) by [@namanthapliyal](https://github.com/namanthapliyal/)
   - [Guide](https://github.com/StevenJSCF/OpenVoice/blob/update-docs/docs/DF_USAGE.md) by [@StevenJSCF](https://github.com/StevenJSCF)
   - You are welcome to contribute if you have a better installation guide. We will list you here.

From af9b9e63c97735098cbd20e889cf93330eb5c55a Mon Sep 17 00:00:00 2001
From: Naman Thapliyal <namanthapliyal1999@gmai.com>
Date: Sat, 19 Jul 2025 10:06:54 +0530
Subject: [PATCH 6/9] Updated readme for usage

---
 docs/docker_usage.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/docs/docker_usage.md b/docs/docker_usage.md
index 10d5813e..c43962bf 100644
--- a/docs/docker_usage.md
+++ b/docs/docker_usage.md
@@ -72,3 +72,7 @@ The API will return the synthesized audio as a .wav file.
 
 Output Directory
 Synthesized audio files and temporary processing files will be stored in the outputs/ directory within the container. For local debugging, you might want to mount a volume to persist these outputs on your host machine.
+
+### 4. Access Swagger Doc
+
+You can access the Swagger UI documentation by navigating to http://localhost:7860/docs in your web browser. This provides an interactive API reference and allows you to test the API endpoints directly through the UI.

From 8c04fa3980209aa124303dbbe2a667591418cce8 Mon Sep 17 00:00:00 2001
From: Naman Thapliyal <namanthapliyal1999@gmai.com>
Date: Sat, 19 Jul 2025 19:57:46 +0530
Subject: [PATCH 7/9] updated requirements

---
 requirements.txt | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/requirements.txt b/requirements.txt
index 8ddba70d..8ffb0d89 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -14,3 +14,15 @@ cn2an==0.5.22
 jieba==0.42.1
 gradio==3.48.0
 langid==1.1.6
+# Core web server
+uvicorn
+fastapi
+python-multipart
+
+# Whisper and language processing
+eng-to-ipa
+
+
+# Audio and text processing
+soundfile
+silero

From b1929569bb0538afc8d4e584dda7870a9beb7316 Mon Sep 17 00:00:00 2001
From: Naman Thapliyal <namanthapliyal1999@gmai.com>
Date: Sat, 19 Jul 2025 20:20:54 +0530
Subject: [PATCH 8/9] updated requirements

---
 requirements.txt | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 8ffb0d89..b3cccb77 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
+# OpenVoice core requirements, with pinned versions for compatibility
 librosa==0.9.1
 faster-whisper==0.9.0
 pydub==0.25.1
@@ -14,15 +15,9 @@ cn2an==0.5.22
 jieba==0.42.1
 gradio==3.48.0
 langid==1.1.6
-# Core web server
-uvicorn
+
+# Add extra requirements for your FastAPI wrapper
 fastapi
+uvicorn
 python-multipart
 
-# Whisper and language processing
-eng-to-ipa
-
-
-# Audio and text processing
-soundfile
-silero

From 8b3571bda08b6d72b1b8cf3e62cfd2b841d7f038 Mon Sep 17 00:00:00 2001
From: Naman Thapliyal <namanthapliyal1999@gmai.com>
Date: Sat, 19 Jul 2025 20:57:43 +0530
Subject: [PATCH 9/9] optimized docker file

---
 Dockerfile | 47 +++++++++++++++++++++--------------------------
 1 file changed, 21 insertions(+), 26 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index f2d250af..98585b80 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,44 +1,39 @@
-# File: services/OpenVoice/Dockerfile
-# Use Ubuntu base image
 FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04
 
-# Update the system and install necessary dependencies
-RUN apt-get update && DEBIEN_FRONTEND=noninteractive apt-get install -y \
-    sudo \
-    python3.9 \
-    python3-distutils \
+# Install Python 3.10 and pip, as well as other dependencies
+RUN apt-get update && \
+    DEBIAN_FRONTEND=noninteractive apt-get install -y \
+    python3.10 \
+    python3.10-distutils \
     python3-pip \
+    sudo \
     ffmpeg \
-    git
+    git \
+    aria2 \
+    unzip && \
+    rm -rf /var/lib/apt/lists/*
 
-# Upgrade pip
-RUN pip install --upgrade pip
+# Optional: ensure python3 points to python3.10
+RUN ln -sf /usr/bin/python3.10 /usr/bin/python3
 
-# Set the working directory in the container
 WORKDIR /app
 
-# Install openai-whisper
+# Clone OpenVoice (or use COPY for local code)
 RUN git clone https://github.com/namanthapliyal/OpenVoice.git openvoice
 
-# Install FastAPI and Uvicorn, and other dependencies
-RUN pip install uvicorn fastapi python-multipart langid faster-whisper whisper-timestamped unidecode eng-to-ipa pypinyin cn2an
-
-# Set the working directory in the container
 WORKDIR /app/openvoice
 
-RUN pip install -e .
-RUN pip install soundfile librosa inflect jieba silero
+# Install Python dependencies
+RUN python3 -m pip install --upgrade pip && \
+    python3 -m pip install --no-cache-dir -r requirements.txt && \
+    python3 -m pip install --no-cache-dir -e .
 
-RUN apt -y install -qq aria2 unzip
-RUN aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/camenduru/OpenVoice/resolve/main/checkpoints_1226.zip -d /app/openvoice -o checkpoints_1226.zip
-RUN unzip /app/openvoice/checkpoints_1226.zip 
-RUN mv /app/openvoice/checkpoints /app/openvoice/openvoice/checkpoints 
-RUN mv /app/openvoice/resources /app/openvoice/openvoice/resources 
+# Download and place checkpoints/resources
+RUN aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/camenduru/OpenVoice/resolve/main/checkpoints_1226.zip -d /app/openvoice -o checkpoints_1226.zip && \
+    unzip /app/openvoice/checkpoints_1226.zip && \
+    rm checkpoints_1226.zip
 
 EXPOSE 7860
 
-# Set the working directory to the openvoice directory where fastapi_app.py will reside
-WORKDIR /app/openvoice/openvoice
 
-# Command to run the FastAPI application with Uvicorn
 CMD ["uvicorn", "fastapi_app:app", "--host", "0.0.0.0", "--port", "7860"]