RVC-Project
diff --git a/‎.gitignore
Lines changed: 6 additions & 0 deletions b/‎.gitignore
Lines changed: 6 additions & 0 deletions
diff --git a/‎README.md
Lines changed: 2 additions & 0 deletions b/‎README.md
Lines changed: 2 additions & 0 deletions
diff --git a/‎Retrieval_based_Voice_Conversion_WebUI.ipynb
Lines changed: 211 additions & 0 deletions b/‎Retrieval_based_Voice_Conversion_WebUI.ipynb
Lines changed: 211 additions & 0 deletions
diff --git a/‎config.py
Lines changed: 13 additions & 1 deletion b/‎config.py
Lines changed: 13 additions & 1 deletion
diff --git a/‎extract_feature_print.py
Lines changed: 13 additions & 9 deletions b/‎extract_feature_print.py
Lines changed: 13 additions & 9 deletions
@@ -0,0 +1,6 @@
+.DS_Store
+__pycache__
+/TEMP
+*.pyd
+hubert_base.pt
+/logs
@@ -1,5 +1,7 @@
 # Retrieval-based-Voice-Conversion-WebUI
 
+[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/liujing04/Retrieval-based-Voice-Conversion-WebUI/blob/main/Retrieval_based_Voice_Conversion_WebUI.ipynb)
+
 缺失的2个文件夹和2个文件：
 
 hubert_base.pt
 
@@ -0,0 +1,211 @@
+{
+  "cells": [
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/liujing04/Retrieval-based-Voice-Conversion-WebUI/blob/main/Retrieval_based_Voice_Conversion_WebUI.ipynb)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "GmFP6bN9dvOq"
+      },
+      "outputs": [],
+      "source": [
+        "#@title 查看显卡\n",
+        "!nvidia-smi"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": []
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "wjddIFr1oS3W"
+      },
+      "outputs": [],
+      "source": [
+        "#@title 安装依赖\n",
+        "!apt-get -y install build-essential python3-dev ffmpeg\n",
+        "!pip3 install --upgrade setuptools wheel\n",
+        "!pip3 install --upgrade pip\n",
+        "!pip3 install faiss-gpu fairseq gradio ffmpeg ffmpeg-python praat-parselmouth pyworld numpy==1.23.5 numba==0.56.4 librosa==0.9.2"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ge_97mfpgqTm"
+      },
+      "outputs": [],
+      "source": [
+        "#@title 克隆仓库\n",
+        "\n",
+        "!git clone --depth=1 https://github.com/fumiama/Retrieval-based-Voice-Conversion-WebUI\n",
+        "%cd /content/Retrieval-based-Voice-Conversion-WebUI\n",
+        "!mkdir -p pretrained uvr5_weights"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "BLDEZADkvlw1"
+      },
+      "outputs": [],
+      "source": [
+        "#@title 更新仓库（一般无需执行）\n",
+        "!git pull"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "UG3XpUwEomUz"
+      },
+      "outputs": [],
+      "source": [
+        "!apt -y install -qq aria2\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D32k.pth\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D40k.pth\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D48k.pth\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o G32k.pth\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o G40k.pth\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o G48k.pth\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0D32k.pth\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0D40k.pth\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0D48k.pth\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G32k.pth\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G40k.pth\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G48k.pth\n",
+        "\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP2-人声vocals+非人声instrumentals.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/uvr5_weights -o HP2-人声vocals+非人声instrumentals.pth\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP5-主旋律人声vocals+其他instrumentals.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/uvr5_weights -o HP5-主旋律人声vocals+其他instrumentals.pth\n",
+        "\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt -d /content/Retrieval-based-Voice-Conversion-WebUI -o hubert_base.pt"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Mwk7Q0Loqzjx"
+      },
+      "outputs": [],
+      "source": [
+        "#@title 从谷歌云盘加载打包好的数据集到/content/dataset\n",
+        "\n",
+        "#@markdown 数据集位置\n",
+        "DATASET = \"/content/drive/MyDrive/dataset/lulu20230327.zip\"  #@param {type:\"string\"}\n",
+        "\n",
+        "from google.colab import drive\n",
+        "drive.mount('/content/drive')\n",
+        "!mkdir -p /content/dataset\n",
+        "!unzip -d /content/dataset {DATASET}"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "7vh6vphDwO0b"
+      },
+      "outputs": [],
+      "source": [
+        "#@title 启动web\n",
+        "%cd /content/Retrieval-based-Voice-Conversion-WebUI\n",
+        "!python3 infer-web.py --colab --pycmd python3"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "FgJuNeAwx5Y_"
+      },
+      "outputs": [],
+      "source": [
+        "#@title 手动将训练后的模型文件备份到谷歌云盘\n",
+        "#@markdown 需要自己查看logs文件夹下模型的文件名，手动修改下方命令末尾的文件名\n",
+        "\n",
+        "#@markdown 模型名\n",
+        "MODELNAME = \"lulu\"  #@param {type:\"string\"}\n",
+        "#@markdown 模型epoch\n",
+        "MODELEPOCH = 3540  #@param {type:\"integer\"}\n",
+        "\n",
+        "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/drive/MyDrive/{MODELNAME}_D_{MODELEPOCH}.pth\n",
+        "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth /content/drive/MyDrive/{MODELNAME}_G_{MODELEPOCH}.pth\n",
+        "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/added_*.index /content/drive/MyDrive/\n",
+        "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/total_*.npy /content/drive/MyDrive/\n",
+        "\n",
+        "!cp /content/Retrieval-based-Voice-Conversion-WebUI/weights/{MODELNAME}.pth /content/drive/MyDrive/{MODELNAME}{MODELEPOCH}.pth"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "OVQoLQJXS7WX"
+      },
+      "outputs": [],
+      "source": [
+        "#@title 从谷歌云盘恢复pth\n",
+        "#@markdown 需要自己查看logs文件夹下模型的文件名，手动修改下方命令末尾的文件名\n",
+        "\n",
+        "#@markdown 模型名\n",
+        "MODELNAME = \"lulu\"  #@param {type:\"string\"}\n",
+        "#@markdown 模型epoch\n",
+        "MODELEPOCH = 730  #@param {type:\"integer\"}\n",
+        "\n",
+        "!cp /content/drive/MyDrive/{MODELNAME}_D_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth\n",
+        "!cp /content/drive/MyDrive/{MODELNAME}_G_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ZKAyuKb9J6dz"
+      },
+      "outputs": [],
+      "source": [
+        "#@title 手动训练（不推荐）\n",
+        "#@markdown 模型名\n",
+        "MODELNAME = \"lulu\"  #@param {type:\"string\"}\n",
+        "\n",
+        "!python3 trainset_preprocess_pipeline_print.py /content/dataset 32000 8 logs/{MODELNAME} True\n",
+        "\n",
+        "!python3 extract_feature_print.py 1 0 0 logs/{MODELNAME}\n",
+        "\n",
+        "!python3 train_nsf_sim_cache_sid_load_pretrain.py -e lulu -sr 32k -f0 0 -g 0 -bs 4 -te 10 -se 5 -pg pretrained/G32k.pth -pd pretrained/D32k.pth -l 0 -c 0\n"
+      ]
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "private_outputs": true,
+      "provenance": []
+    },
+    "gpuClass": "standard",
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
@@ -1,3 +1,10 @@
+import argparse
+parser = argparse.ArgumentParser()
+parser.add_argument("--port", type=int, default=7865, help="Listen port")
+parser.add_argument("--pycmd", type=str, default="python", help="Python command")
+parser.add_argument("--colab", action='store_true', help="Launch in colab")
+parser.add_argument("--noparallel", action='store_true', help="Disable parallel processing")
+cmd_opts = parser.parse_args()
 ############离线VC参数
 inp_root=r"白鹭霜华长条"#对输入目录下所有音频进行转换，别放非音频文件
 opt_root=r"opt"#输出目录
@@ -7,10 +14,15 @@
 device = "cuda:0"#填写cuda:x或cpu，x指代第几张卡，只支持N卡加速
 is_half=True#9-10-20-30-40系显卡无脑True，不影响质量，>=20显卡开启有加速
 n_cpu=0#默认0用上所有线程，写数字限制CPU资源使用
+############python命令路径
+python_cmd=cmd_opts.pycmd
+listen_port=cmd_opts.port
+iscolab=cmd_opts.colab
+noparallel=cmd_opts.noparallel
 ############下头别动
 import torch
 if(torch.cuda.is_available()==False):
-    print("没有发现支持的N卡，使用CPU进行推理")
+    print("没有发现支持的N卡, 使用CPU进行推理")
     device="cpu"
     is_half=False
 if(device!="cpu"):
 
@@ -1,17 +1,20 @@
 import os,sys,traceback
-n_part=int(sys.argv[1])
-i_part=int(sys.argv[2])
-i_gpu=sys.argv[3]
-exp_dir=sys.argv[4]
-os.environ["CUDA_VISIBLE_DEVICES"]=str(i_gpu)
+if len(sys.argv) == 4:
+    n_part=int(sys.argv[1])
+    i_part=int(sys.argv[2])
+    exp_dir=sys.argv[3]
+else:
+    n_part=int(sys.argv[1])
+    i_part=int(sys.argv[2])
+    i_gpu=sys.argv[3]
+    exp_dir=sys.argv[4]
+    os.environ["CUDA_VISIBLE_DEVICES"]=str(i_gpu)
 
 import torch
 import torch.nn.functional as F
 import soundfile as sf
 import numpy as np
-import joblib
 from fairseq import checkpoint_utils
-import pdb
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
 f = open("%s/extract_f0_feature.log"%exp_dir, "a+")
@@ -48,7 +51,8 @@ def readwave(wav_path, normalize=False):
 )
 model = models[0]
 model = model.to(device)
-model = model.half()
+if torch.cuda.is_available():
+    model = model.half()
 model.eval()
 
 todo=sorted(list(os.listdir(wavPath)))[i_part::n_part]
@@ -67,7 +71,7 @@ def readwave(wav_path, normalize=False):
                 feats = readwave(wav_path, normalize=saved_cfg.task.normalize)
                 padding_mask = torch.BoolTensor(feats.shape).fill_(False)
                 inputs = {
-                    "source": feats.half().to(device),
+                    "source": feats.half().to(device) if torch.cuda.is_available() else feats.to(device),
                     "padding_mask": padding_mask.to(device),
                     "output_layer": 9,  # layer 9
                 }