Skip to content
4 changes: 3 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,10 @@ openai-whisper==20231117
onnxruntime==1.18.1
inflect==7.3.1
unidecode==1.3.8
# NOTE: 这个似乎不需要
# NOTE: 这个需要,否则无法启动CosyVoice服务
# matcha-tts
matcha-tts==0.0.7.2


# whisper
faster_whisper==1.0.3
Expand Down
41 changes: 41 additions & 0 deletions scripts/dl_cosyvoice.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import logging

from scripts.dl_base import BaseModelDownloader

logger = logging.getLogger(__name__)


class CosyVoiceDownloader(BaseModelDownloader):
def __init__(self):
required_files = [
"campplus.onnx",
"configuration.json",
"cosyvoice.yaml",
"flow.pt",
"hift.pt",
"llm.pt",
"speech_tokenizer_v2.onnx",
"CosyVoice-BlankEN/model.safetensors",
"CosyVoice-BlankEN/config.json",
"CosyVoice-BlankEN/generation_config.json",
"CosyVoice-BlankEN/merges.txt",
"CosyVoice-BlankEN/tokenizer_config.json",
"CosyVoice-BlankEN/vocab.json",
]
super().__init__(
model_name="CosyVoice2-0.5B",
# modelscope_repo="iic/CosyVoice2-0.5B",
# NOTE: 改用这个是以外上面iic这个repo里面文件名和hf的不一致...
modelscope_repo="aiwantaozi/CosyVoice2-0.5B",
huggingface_repo="FunAudioLLM/CosyVoice2-0.5B",
required_files=required_files,
just_download_required_files=True,
)
self.logger = logger


if __name__ == "__main__":
from scripts.dl_args import parser_args

args = parser_args()
CosyVoiceDownloader()(source=args.source)
7 changes: 3 additions & 4 deletions scripts/dl_cosyvoice_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
logger = logging.getLogger(__name__)


class CosyVoiceInstructDownloader(BaseModelDownloader):
class CosyVoiceBaseDownloader(BaseModelDownloader):
def __init__(self):
required_files = [
"campplus.onnx",
Expand All @@ -14,8 +14,7 @@ def __init__(self):
"flow.pt",
"hift.pt",
"llm.pt",
"speech_tokenizer_v1.onnx",
"spk2info.pt",
"speech_tokenizer_v1.onnx"
]
super().__init__(
model_name="CosyVoice_300M",
Expand All @@ -30,4 +29,4 @@ def __init__(self):
from scripts.dl_args import parser_args

args = parser_args()
CosyVoiceInstructDownloader()(source=args.source)
CosyVoiceBaseDownloader()(source=args.source)
27 changes: 27 additions & 0 deletions scripts/dl_f5_tts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import logging

from scripts.dl_base import BaseModelDownloader

logger = logging.getLogger(__name__)


class F5TTSDownloader(BaseModelDownloader):
def __init__(self):
required_files = [
"F5TTS_Base/model_1200000.safetensors",
]
super().__init__(
model_name="F5-TTS",
modelscope_repo="AI-ModelScope/F5-TTS",
huggingface_repo="SWivid/F5-TTS",
required_files=required_files,
)

self.logger = logger


if __name__ == "__main__":
from scripts.dl_args import parser_args

args = parser_args()
F5TTSDownloader()(source=args.source)
31 changes: 31 additions & 0 deletions scripts/dl_faster_whisper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import logging

from scripts.dl_base import BaseModelDownloader

logger = logging.getLogger(__name__)


class FasterWhisperDownloader(BaseModelDownloader):
def __init__(self):
required_files = [
"model.bin",
"tokenizer.json",
"vocabulary.json",
"preprocessor_config.json",
"config.json",
]
super().__init__(
model_name="faster-whisper-large-v3",
modelscope_repo="keepitsimple/faster-whisper-large-v3",
huggingface_repo="Systran/faster-whisper-large-v3",
required_files=required_files,
)

self.logger = logger


if __name__ == "__main__":
from scripts.dl_args import parser_args

args = parser_args()
FasterWhisperDownloader()(source=args.source)
29 changes: 29 additions & 0 deletions scripts/dl_fire_red_tts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import logging

from scripts.dl_base import BaseModelDownloader

logger = logging.getLogger(__name__)


class FireRedTTSDownloader(BaseModelDownloader):
def __init__(self):
required_files = [
"gpt.pt",
"speaker.bin",
"token2wav.pt",
]
super().__init__(
model_name="FireRedTTS",
modelscope_repo="pengzhendong/FireRedTTS",
huggingface_repo="fireredteam/FireRedTTS",
required_files=required_files,
)

self.logger = logger


if __name__ == "__main__":
from scripts.dl_args import parser_args

args = parser_args()
FireRedTTSDownloader()(source=args.source)
32 changes: 32 additions & 0 deletions scripts/dl_fish_speech_1_2sft.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import logging

from scripts.dl_base import BaseModelDownloader

logger = logging.getLogger(__name__)


class FishSpeechDownloader(BaseModelDownloader):
def __init__(self):
required_files = [
"config.json",
"firefly-gan-vq-fsq-4x1024-42hz-generator.pth",
"model.pth",
"special_tokens_map.json",
"tokenizer.json",
"tokenizer_config.json",
]
super().__init__(
model_name="fish-speech-1.2-sft",
modelscope_repo="fishaudio/fish-speech-1.2-sft",
huggingface_repo="fishaudio/fish-speech-1.2-sft",
required_files=required_files,
)

self.logger = logger


if __name__ == "__main__":
from scripts.dl_args import parser_args

args = parser_args()
FishSpeechDownloader()(source=args.source)
32 changes: 32 additions & 0 deletions scripts/dl_fish_speech_1_4.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import logging

from scripts.dl_base import BaseModelDownloader

logger = logging.getLogger(__name__)


class FishSpeech14Downloader(BaseModelDownloader):
def __init__(self):
required_files = [
"config.json",
"firefly-gan-vq-fsq-8x1024-21hz-generator.pth",
"model.pth",
"special_tokens_map.json",
"tokenizer.json",
"tokenizer_config.json",
]
super().__init__(
model_name="fish-speech-1_4",
modelscope_repo="AI-ModelScope/fish-speech-1.4",
huggingface_repo="fishaudio/fish-speech-1.4",
required_files=required_files,
)

self.logger = logger


if __name__ == "__main__":
from scripts.dl_args import parser_args

args = parser_args()
FishSpeech14Downloader()(source=args.source)
34 changes: 34 additions & 0 deletions scripts/dl_index_tts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import logging

from scripts.dl_base import BaseModelDownloader

logger = logging.getLogger(__name__)


class IndexTTSDownloader(BaseModelDownloader):
def __init__(self):
required_files = [
"bigvgan_discriminator.pth",
"bigvgan_generator.pth",
"bpe.model",
"config.yaml",
"dvae.pth",
"gpt.pth",
"unigram_12000.vocab",
]
super().__init__(
model_name="Index-TTS",
modelscope_repo="IndexTeam/Index-TTS",
huggingface_repo="IndexTeam/Index-TTS",
required_files=required_files,
just_download_required_files=True,
)

self.logger = logger


if __name__ == "__main__":
from scripts.dl_args import parser_args

args = parser_args()
IndexTTSDownloader()(source=args.source)
28 changes: 28 additions & 0 deletions scripts/dl_open_voice.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import logging

from scripts.dl_base import BaseModelDownloader

logger = logging.getLogger(__name__)


class OpenVoiceDownloader(BaseModelDownloader):
def __init__(self):
required_files = [
"converter/checkpoint.pth",
"converter/config.json",
]
super().__init__(
model_name="OpenVoiceV2",
modelscope_repo="myshell-ai/OpenVoiceV2",
huggingface_repo="myshell-ai/OpenVoiceV2",
required_files=required_files,
)

self.logger = logger


if __name__ == "__main__":
from scripts.dl_args import parser_args

args = parser_args()
OpenVoiceDownloader()(source=args.source)
28 changes: 28 additions & 0 deletions scripts/dl_vocos_mel_24khz.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import logging

from scripts.dl_base import BaseModelDownloader

logger = logging.getLogger(__name__)


class VocosMel24khzDownloader(BaseModelDownloader):
def __init__(self):
required_files = [
"config.yaml",
"pytorch_model.bin",
]
super().__init__(
model_name="vocos-mel-24khz",
modelscope_repo="pengzhendong/vocos-mel-24khz",
huggingface_repo="charactr/vocos-mel-24khz",
required_files=required_files,
)

self.logger = logger


if __name__ == "__main__":
from scripts.dl_args import parser_args

args = parser_args()
VocosMel24khzDownloader()(source=args.source)
22 changes: 22 additions & 0 deletions scripts/download_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,18 @@
from scripts.dl_args import parser_args
from scripts.dl_chattts import ChatTTSDownloader
from scripts.dl_enhance import ResembleEnhanceDownloader
from scripts.dl_cosyvoice import CosyVoiceDownloader
from scripts.dl_cosyvoice_base import CosyVoiceBaseDownloader
from scripts.dl_cosyvoice_instruct import CosyVoiceInstructDownloader
from scripts.dl_f5_tts import F5TTSDownloader
from scripts.dl_faster_whisper import FasterWhisperDownloader
from scripts.dl_fire_red_tts import FireRedTTSDownloader
from scripts.dl_fish_speech_1_2sft import FishSpeechDownloader
from scripts.dl_fish_speech_1_4 import FishSpeech14Downloader
from scripts.dl_open_voice import OpenVoiceDownloader
from scripts.dl_vocos_mel_24khz import VocosMel24khzDownloader
from scripts.dl_index_tts import IndexTTSDownloader

from scripts.ModelDownloader import ModelDownloader


Expand All @@ -17,6 +29,16 @@ def main():
downloaders: list[ModelDownloader] = []
downloaders.append(ChatTTSDownloader())
downloaders.append(ResembleEnhanceDownloader())
downloaders.append(CosyVoiceBaseDownloader())
downloaders.append(CosyVoiceInstructDownloader())
downloaders.append(F5TTSDownloader())
downloaders.append(FasterWhisperDownloader())
downloaders.append(FireRedTTSDownloader())
downloaders.append(FishSpeechDownloader())
downloaders.append(FishSpeech14Downloader())
downloaders.append(OpenVoiceDownloader())
downloaders.append(VocosMel24khzDownloader())
downloaders.append(IndexTTSDownloader())

for downloader in downloaders:
downloader(source=args.source)
Expand Down
6 changes: 3 additions & 3 deletions scripts/downloader/fire_red_tts.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
class FireRedTTSDownloader(BaseModelDownloader):
def __init__(self):
required_files = [
"fireredtts_gpt.pt",
"fireredtts_speaker.bin",
"fireredtts_token2wav.pt",
"gpt.pt",
"speaker.bin",
"token2wav.pt",
]
super().__init__(
model_name="FireRedTTS",
Expand Down
4 changes: 2 additions & 2 deletions scripts/downloader/index_tts.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
logger = logging.getLogger(__name__)


class CosyVoice2Downloader(BaseModelDownloader):
class IndexTTSDownloader(BaseModelDownloader):
def __init__(self):
required_files = [
"bigvgan_discriminator.pth",
Expand Down Expand Up @@ -33,4 +33,4 @@ def __init__(self):
from scripts.dl_args import parser_args

args = parser_args()
CosyVoice2Downloader()(source=args.source)
IndexTTSDownloader()(source=args.source)