Skip to content

Commit 4b039b0

Browse files
authored
Merge pull request #4 from fumiama/main
增加colab笔记本以方便使用
2 parents 2e3dc22 + 10b74d7 commit 4b039b0

30 files changed

+342
-85
lines changed

.gitignore

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
.DS_Store
2+
__pycache__
3+
/TEMP
4+
*.pyd
5+
hubert_base.pt
6+
/logs

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# Retrieval-based-Voice-Conversion-WebUI
22

3+
[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/liujing04/Retrieval-based-Voice-Conversion-WebUI/blob/main/Retrieval_based_Voice_Conversion_WebUI.ipynb)
4+
35
缺失的2个文件夹和2个文件:
46

57
hubert_base.pt
Lines changed: 211 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,211 @@
1+
{
2+
"cells": [
3+
{
4+
"attachments": {},
5+
"cell_type": "markdown",
6+
"metadata": {},
7+
"source": [
8+
"[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/liujing04/Retrieval-based-Voice-Conversion-WebUI/blob/main/Retrieval_based_Voice_Conversion_WebUI.ipynb)"
9+
]
10+
},
11+
{
12+
"cell_type": "code",
13+
"execution_count": null,
14+
"metadata": {
15+
"id": "GmFP6bN9dvOq"
16+
},
17+
"outputs": [],
18+
"source": [
19+
"#@title 查看显卡\n",
20+
"!nvidia-smi"
21+
]
22+
},
23+
{
24+
"cell_type": "markdown",
25+
"metadata": {},
26+
"source": []
27+
},
28+
{
29+
"cell_type": "code",
30+
"execution_count": null,
31+
"metadata": {
32+
"id": "wjddIFr1oS3W"
33+
},
34+
"outputs": [],
35+
"source": [
36+
"#@title 安装依赖\n",
37+
"!apt-get -y install build-essential python3-dev ffmpeg\n",
38+
"!pip3 install --upgrade setuptools wheel\n",
39+
"!pip3 install --upgrade pip\n",
40+
"!pip3 install faiss-gpu fairseq gradio ffmpeg ffmpeg-python praat-parselmouth pyworld numpy==1.23.5 numba==0.56.4 librosa==0.9.2"
41+
]
42+
},
43+
{
44+
"cell_type": "code",
45+
"execution_count": null,
46+
"metadata": {
47+
"id": "ge_97mfpgqTm"
48+
},
49+
"outputs": [],
50+
"source": [
51+
"#@title 克隆仓库\n",
52+
"\n",
53+
"!git clone --depth=1 https://github.com/fumiama/Retrieval-based-Voice-Conversion-WebUI\n",
54+
"%cd /content/Retrieval-based-Voice-Conversion-WebUI\n",
55+
"!mkdir -p pretrained uvr5_weights"
56+
]
57+
},
58+
{
59+
"cell_type": "code",
60+
"execution_count": null,
61+
"metadata": {
62+
"id": "BLDEZADkvlw1"
63+
},
64+
"outputs": [],
65+
"source": [
66+
"#@title 更新仓库(一般无需执行)\n",
67+
"!git pull"
68+
]
69+
},
70+
{
71+
"cell_type": "code",
72+
"execution_count": null,
73+
"metadata": {
74+
"id": "UG3XpUwEomUz"
75+
},
76+
"outputs": [],
77+
"source": [
78+
"!apt -y install -qq aria2\n",
79+
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D32k.pth\n",
80+
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D40k.pth\n",
81+
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D48k.pth\n",
82+
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o G32k.pth\n",
83+
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o G40k.pth\n",
84+
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o G48k.pth\n",
85+
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0D32k.pth\n",
86+
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0D40k.pth\n",
87+
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0D48k.pth\n",
88+
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G32k.pth\n",
89+
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G40k.pth\n",
90+
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G48k.pth\n",
91+
"\n",
92+
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP2-人声vocals+非人声instrumentals.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/uvr5_weights -o HP2-人声vocals+非人声instrumentals.pth\n",
93+
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP5-主旋律人声vocals+其他instrumentals.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/uvr5_weights -o HP5-主旋律人声vocals+其他instrumentals.pth\n",
94+
"\n",
95+
"!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt -d /content/Retrieval-based-Voice-Conversion-WebUI -o hubert_base.pt"
96+
]
97+
},
98+
{
99+
"cell_type": "code",
100+
"execution_count": null,
101+
"metadata": {
102+
"id": "Mwk7Q0Loqzjx"
103+
},
104+
"outputs": [],
105+
"source": [
106+
"#@title 从谷歌云盘加载打包好的数据集到/content/dataset\n",
107+
"\n",
108+
"#@markdown 数据集位置\n",
109+
"DATASET = \"/content/drive/MyDrive/dataset/lulu20230327.zip\" #@param {type:\"string\"}\n",
110+
"\n",
111+
"from google.colab import drive\n",
112+
"drive.mount('/content/drive')\n",
113+
"!mkdir -p /content/dataset\n",
114+
"!unzip -d /content/dataset {DATASET}"
115+
]
116+
},
117+
{
118+
"cell_type": "code",
119+
"execution_count": null,
120+
"metadata": {
121+
"id": "7vh6vphDwO0b"
122+
},
123+
"outputs": [],
124+
"source": [
125+
"#@title 启动web\n",
126+
"%cd /content/Retrieval-based-Voice-Conversion-WebUI\n",
127+
"!python3 infer-web.py --colab --pycmd python3"
128+
]
129+
},
130+
{
131+
"cell_type": "code",
132+
"execution_count": null,
133+
"metadata": {
134+
"id": "FgJuNeAwx5Y_"
135+
},
136+
"outputs": [],
137+
"source": [
138+
"#@title 手动将训练后的模型文件备份到谷歌云盘\n",
139+
"#@markdown 需要自己查看logs文件夹下模型的文件名,手动修改下方命令末尾的文件名\n",
140+
"\n",
141+
"#@markdown 模型名\n",
142+
"MODELNAME = \"lulu\" #@param {type:\"string\"}\n",
143+
"#@markdown 模型epoch\n",
144+
"MODELEPOCH = 3540 #@param {type:\"integer\"}\n",
145+
"\n",
146+
"!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/drive/MyDrive/{MODELNAME}_D_{MODELEPOCH}.pth\n",
147+
"!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth /content/drive/MyDrive/{MODELNAME}_G_{MODELEPOCH}.pth\n",
148+
"!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/added_*.index /content/drive/MyDrive/\n",
149+
"!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/total_*.npy /content/drive/MyDrive/\n",
150+
"\n",
151+
"!cp /content/Retrieval-based-Voice-Conversion-WebUI/weights/{MODELNAME}.pth /content/drive/MyDrive/{MODELNAME}{MODELEPOCH}.pth"
152+
]
153+
},
154+
{
155+
"cell_type": "code",
156+
"execution_count": null,
157+
"metadata": {
158+
"id": "OVQoLQJXS7WX"
159+
},
160+
"outputs": [],
161+
"source": [
162+
"#@title 从谷歌云盘恢复pth\n",
163+
"#@markdown 需要自己查看logs文件夹下模型的文件名,手动修改下方命令末尾的文件名\n",
164+
"\n",
165+
"#@markdown 模型名\n",
166+
"MODELNAME = \"lulu\" #@param {type:\"string\"}\n",
167+
"#@markdown 模型epoch\n",
168+
"MODELEPOCH = 730 #@param {type:\"integer\"}\n",
169+
"\n",
170+
"!cp /content/drive/MyDrive/{MODELNAME}_D_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth\n",
171+
"!cp /content/drive/MyDrive/{MODELNAME}_G_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth"
172+
]
173+
},
174+
{
175+
"cell_type": "code",
176+
"execution_count": null,
177+
"metadata": {
178+
"id": "ZKAyuKb9J6dz"
179+
},
180+
"outputs": [],
181+
"source": [
182+
"#@title 手动训练(不推荐)\n",
183+
"#@markdown 模型名\n",
184+
"MODELNAME = \"lulu\" #@param {type:\"string\"}\n",
185+
"\n",
186+
"!python3 trainset_preprocess_pipeline_print.py /content/dataset 32000 8 logs/{MODELNAME} True\n",
187+
"\n",
188+
"!python3 extract_feature_print.py 1 0 0 logs/{MODELNAME}\n",
189+
"\n",
190+
"!python3 train_nsf_sim_cache_sid_load_pretrain.py -e lulu -sr 32k -f0 0 -g 0 -bs 4 -te 10 -se 5 -pg pretrained/G32k.pth -pd pretrained/D32k.pth -l 0 -c 0\n"
191+
]
192+
}
193+
],
194+
"metadata": {
195+
"accelerator": "GPU",
196+
"colab": {
197+
"private_outputs": true,
198+
"provenance": []
199+
},
200+
"gpuClass": "standard",
201+
"kernelspec": {
202+
"display_name": "Python 3",
203+
"name": "python3"
204+
},
205+
"language_info": {
206+
"name": "python"
207+
}
208+
},
209+
"nbformat": 4,
210+
"nbformat_minor": 0
211+
}

config.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1+
import argparse
2+
parser = argparse.ArgumentParser()
3+
parser.add_argument("--port", type=int, default=7865, help="Listen port")
4+
parser.add_argument("--pycmd", type=str, default="python", help="Python command")
5+
parser.add_argument("--colab", action='store_true', help="Launch in colab")
6+
parser.add_argument("--noparallel", action='store_true', help="Disable parallel processing")
7+
cmd_opts = parser.parse_args()
18
############离线VC参数
29
inp_root=r"白鹭霜华长条"#对输入目录下所有音频进行转换,别放非音频文件
310
opt_root=r"opt"#输出目录
@@ -7,10 +14,15 @@
714
device = "cuda:0"#填写cuda:x或cpu,x指代第几张卡,只支持N卡加速
815
is_half=True#9-10-20-30-40系显卡无脑True,不影响质量,>=20显卡开启有加速
916
n_cpu=0#默认0用上所有线程,写数字限制CPU资源使用
17+
############python命令路径
18+
python_cmd=cmd_opts.pycmd
19+
listen_port=cmd_opts.port
20+
iscolab=cmd_opts.colab
21+
noparallel=cmd_opts.noparallel
1022
############下头别动
1123
import torch
1224
if(torch.cuda.is_available()==False):
13-
print("没有发现支持的N卡使用CPU进行推理")
25+
print("没有发现支持的N卡, 使用CPU进行推理")
1426
device="cpu"
1527
is_half=False
1628
if(device!="cpu"):

extract_feature_print.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,20 @@
11
import os,sys,traceback
2-
n_part=int(sys.argv[1])
3-
i_part=int(sys.argv[2])
4-
i_gpu=sys.argv[3]
5-
exp_dir=sys.argv[4]
6-
os.environ["CUDA_VISIBLE_DEVICES"]=str(i_gpu)
2+
if len(sys.argv) == 4:
3+
n_part=int(sys.argv[1])
4+
i_part=int(sys.argv[2])
5+
exp_dir=sys.argv[3]
6+
else:
7+
n_part=int(sys.argv[1])
8+
i_part=int(sys.argv[2])
9+
i_gpu=sys.argv[3]
10+
exp_dir=sys.argv[4]
11+
os.environ["CUDA_VISIBLE_DEVICES"]=str(i_gpu)
712

813
import torch
914
import torch.nn.functional as F
1015
import soundfile as sf
1116
import numpy as np
12-
import joblib
1317
from fairseq import checkpoint_utils
14-
import pdb
1518
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
1619

1720
f = open("%s/extract_f0_feature.log"%exp_dir, "a+")
@@ -48,7 +51,8 @@ def readwave(wav_path, normalize=False):
4851
)
4952
model = models[0]
5053
model = model.to(device)
51-
model = model.half()
54+
if torch.cuda.is_available():
55+
model = model.half()
5256
model.eval()
5357

5458
todo=sorted(list(os.listdir(wavPath)))[i_part::n_part]
@@ -67,7 +71,7 @@ def readwave(wav_path, normalize=False):
6771
feats = readwave(wav_path, normalize=saved_cfg.task.normalize)
6872
padding_mask = torch.BoolTensor(feats.shape).fill_(False)
6973
inputs = {
70-
"source": feats.half().to(device),
74+
"source": feats.half().to(device) if torch.cuda.is_available() else feats.to(device),
7175
"padding_mask": padding_mask.to(device),
7276
"output_layer": 9, # layer 9
7377
}

0 commit comments

Comments
 (0)