replace onnx export with OpenUtau voicebank builder

MLo7Ghinsan · MLo7Ghinsan · commit 6c9b24708633 · 2023-09-06T01:27:30.000-05:00
diff --git a/DiffSinger_colab_notebook.ipynb b/DiffSinger_colab_notebook.ipynb
@@ -4,7 +4,6 @@
   "metadata": {
     "colab": {
       "provenance": [],
-      "gpuType": "A100",
       "collapsed_sections": [
         "MP5rRkbTpnG8",
         "Wv0gfI5feBSc",
@@ -13,7 +12,6 @@
         "FY40fGHEg9_i",
         "4sbU1aH5kGFE"
       ],
-      "machine_shape": "hm",
       "include_colab_link": true
     },
     "kernelspec": {
@@ -22,8 +20,7 @@
     },
     "language_info": {
       "name": "python"
-    },
-    "accelerator": "GPU"
+    }
   },
   "cells": [
     {
@@ -685,21 +682,55 @@
     {
       "cell_type": "code",
       "source": [
-        "#@markdown # Convert to ONNX for OpenUtau\n",
+        "#@markdown # Build OpenUtau compatible voicebank\n",
         "%cd /content\n",
+        "from IPython.display import clear_output\n",
+        "clear_output()\n",
+        "import os\n",
+        "#@markdown <font size=\"-1.5\"> select this if you don't want to see the onnx converter's output\n",
+        "no_output = True # @param {type:\"boolean\"}\n",
         "\n",
-        "#@markdown <font size=\"-1.5\"> the type of the model you want to convert\n",
-        "model_type = \"acoustic\" # @param [\"acoustic\", \"variance\"]\n",
+        "#@markdown <font size=\"-1.5\"> path to your **ACOUSTIC CHECKPOINT**: automatically use latest checkpoint that is in the same folder\n",
+        "acoustic_checkpoint_path = \"\" #@param{type:\"string'}\n",
+        "acoustic_folder_name = os.path.basename(os.path.dirname(acoustic_checkpoint_path)) + \"_acoustic\"\n",
+        "acoustic_folder_path = os.path.dirname(acoustic_checkpoint_path)\n",
         "\n",
-        "#@markdown <font size=\"-1.5\"> path to your checkpoint's **FOLDER** (NOT the model itself) or path to your save_dir\n",
-        "checkpoints_path = \"\" #@param{type:\"string'}\n",
-        "folder_name = os.path.basename(checkpoints_path)\n",
+        "#@markdown <font size=\"-1.5\"> path to your **VARIANCE CHECKPOINT** (leave blank if you don't have any): automatically use latest checkpoint that is in the same folder\n",
+        "variance_checkpoint_path = \"\" #@param{type:\"string'}\n",
+        "variance_folder_name = os.path.basename(os.path.dirname(variance_checkpoint_path)) + \"_variance\"\n",
+        "variance_folder_path = os.path.dirname(variance_checkpoint_path)\n",
         "\n",
-        "#@markdown <font size=\"-1.5\"> path to where you want to save your converted model and it's file\n",
+        "#@markdown <font size=\"-1.5\"> path to your word to phoneme dict (leave blank to use default Japanese dict)\n",
+        "dictionary_path = \"\" #@param{type:\"string\"}\n",
+        "\n",
+        "#@markdown <font size=\"-1.5\"> path to where you want to save your OpenUtau bank\n",
         "exp_folder = \"\" #@param{type:\"string\"}\n",
         "\n",
+        "acoustic_onnx_exp = exp_folder + \"/onnx/acoustic\"\n",
+        "variance_onnx_exp = exp_folder + \"/onnx/variance\"\n",
+        "\n",
+        "print(\"\\n\")\n",
+        "print(\"getting base files...\")\n",
+        "\n",
+        "!mkdir -p /content/OU_compatible_files/enunux_base\n",
+        "!mkdir -p /content/OU_compatible_files/variance_base\n",
+        "!wget https://github.com/MLo7Ghinsan/DiffSinger_colab_notebook_MLo7/releases/download/OU_files/enunux_base.zip >/dev/null 2>&1\n",
+        "!wget https://github.com/MLo7Ghinsan/DiffSinger_colab_notebook_MLo7/releases/download/OU_files/variance_base.zip >/dev/null 2>&1\n",
+        "!wget https://github.com/MLo7Ghinsan/DiffSinger_colab_notebook_MLo7/releases/download/OU_files/jpn_dict.txt >/dev/null 2>&1\n",
+        "!unzip -q /content/enunux_base.zip -d /content/OU_compatible_files/enunux_base\n",
+        "!unzip -q /content/variance_base.zip -d /content/OU_compatible_files/variance_base\n",
+        "!rm /content/enunux_base.zip\n",
+        "!rm /content/variance_base.zip\n",
+        "\n",
+        "!cp {acoustic_checkpoint_path} -r /content/DiffSinger/checkpoints/{acoustic_folder_name}\n",
+        "!cp {acoustic_folder_path}/config.yaml -r /content/DiffSinger/checkpoints/{acoustic_folder_name}\n",
+        "!cp {acoustic_folder_path}/dictionary.txt -r /content/DiffSinger/checkpoints/{acoustic_folder_name} # i dont think this is needed but its only one file oh well\n",
+        "!cp {acoustic_folder_path}/spk_map.json -r /content/DiffSinger/checkpoints/{acoustic_folder_name}\n",
+        "\n",
+        "print(\"\\n\")\n",
+        "print(\"converting acoustic to onnx...\")\n",
         "search_text = \"        args_work_dir = os.path.join(\"\n",
-        "replacement = f\"        args_work_dir = '{checkpoints_path}'\"\n",
+        "replacement = f\"        args_work_dir = '{acoustic_folder_path}'\"\n",
         "with open(\"/content/DiffSinger/utils/hparams.py\", \"r\") as file:\n",
         "    lines = file.readlines()\n",
         "for i, line in enumerate(lines):\n",
@@ -710,7 +741,7 @@
         "        file.writelines(lines)\n",
         "#incase if anyone wanna change it lmao\n",
         "search_text_alt = \"        args_work_dir = '\"\n",
-        "replacement_alt = f\"        args_work_dir = '{checkpoints_path}'\"\n",
+        "replacement_alt = f\"        args_work_dir = '{acoustic_folder_path}'\"\n",
         "with open(\"/content/DiffSinger/utils/hparams.py\", \"r\") as file:\n",
         "    lines = file.readlines()\n",
         "for i, line in enumerate(lines):\n",
@@ -720,11 +751,162 @@
         "with open(\"/content/DiffSinger/utils/hparams.py\", \"w\") as file:\n",
         "        file.writelines(lines)\n",
         "\n",
-        "!cp {checkpoints_path} -r /content/DiffSinger/checkpoints\n",
-        "if no_warn:\n",
-        "    !python /content/DiffSinger/scripts/export.py {model_type} --exp {folder_name} --out {exp_folder} 2> /dev/null\n",
+        "#if no_output:\n",
+        "#    !python /content/DiffSinger/scripts/export.py acoustic --exp {acoustic_folder_name} --out {exp_folder}/onnx/acoustic >/dev/null 2>&1\n",
+        "#else:\n",
+        "#    !python /content/DiffSinger/scripts/export.py acoustic --exp {acoustic_folder_name} --out {exp_folder}/onnx/acoustic\n",
+        "\n",
+        "\n",
+        "if not variance_checkpoint_path:\n",
+        "    print(\"\\n\")\n",
+        "    print(\"variance ckeckpoint path not specified, using enunux instead...\")\n",
+        "else:\n",
+        "    print(\"\\n\")\n",
+        "    print(\"converting variance to onnx...\")\n",
+        "    !cp {variance_checkpoint_path} -r /content/DiffSinger/checkpoints/{variance_folder_name}\n",
+        "    !cp {variance_folder_path}/config.yaml -r /content/DiffSinger/checkpoints/{variance_folder_name}\n",
+        "    !cp {variance_folder_path}/dictionary.txt -r /content/DiffSinger/checkpoints/{variance_folder_name} # i dont think this is needed but its only one file oh well\n",
+        "    !cp {variance_folder_path}/spk_map.json -r /content/DiffSinger/checkpoints/{variance_folder_name}\n",
+        "    search_text = \"        args_work_dir = os.path.join(\"\n",
+        "    replacement = f\"        args_work_dir = '{variance_folder_path}'\"\n",
+        "    with open(\"/content/DiffSinger/utils/hparams.py\", \"r\") as file:\n",
+        "        lines = file.readlines()\n",
+        "    for i, line in enumerate(lines):\n",
+        "        if search_text in line:\n",
+        "            lines[i] = replacement + \"\\n\"\n",
+        "            break\n",
+        "    with open(\"/content/DiffSinger/utils/hparams.py\", \"w\") as file:\n",
+        "            file.writelines(lines)\n",
+        "    #incase if anyone wanna change it lmao\n",
+        "    search_text_alt = \"        args_work_dir = '\"\n",
+        "    replacement_alt = f\"        args_work_dir = '{variance_folder_path}'\"\n",
+        "    with open(\"/content/DiffSinger/utils/hparams.py\", \"r\") as file:\n",
+        "        lines = file.readlines()\n",
+        "    for i, line in enumerate(lines):\n",
+        "        if search_text_alt in line:\n",
+        "            lines[i] = replacement_alt + \"\\n\"\n",
+        "            break\n",
+        "    with open(\"/content/DiffSinger/utils/hparams.py\", \"w\") as file:\n",
+        "            file.writelines(lines)\n",
+        "    if no_output:\n",
+        "        !python /content/DiffSinger/scripts/export.py variance --exp {variance_folder_name} --out {exp_folder}/onnx/variance >/dev/null 2>&1\n",
+        "    else:\n",
+        "        !python /content/DiffSinger/scripts/export.py variance --exp {variance_folder_name} --out {exp_folder}/onnx/variance\n",
+        "\n",
+        "if not variance_checkpoint_path:\n",
+        "    folder_paths = [acoustic_onnx_exp]\n",
+        "else:\n",
+        "    folder_paths = [acoustic_onnx_exp, variance_onnx_exp]\n",
+        "\n",
+        "#renaming these so its gonna be easier\n",
+        "patterns = {\"acoustic.onnx\": \"acoustic.onnx\", \"dur.onnx\": \"dur.onnx\", \"linguistic.onnx\": \"linguistic.onnx\", \"pitch.onnx\": \"pitch.onnx\", \"variance.onnx\": \"variance.onnx\", \"phonemes.txt\": \"phonemes.txt\"}\n",
+        "\n",
+        "\n",
+        "for folder_path in folder_paths:\n",
+        "    for filename in os.listdir(folder_path):\n",
+        "        for pattern, new_name in patterns.items():\n",
+        "            if pattern in filename:\n",
+        "                old_path = os.path.join(folder_path, filename)\n",
+        "                new_path = os.path.join(folder_path, new_name)\n",
+        "                if os.path.exists(old_path):\n",
+        "                    os.rename(old_path, new_path)\n",
+        "\n",
+        "print(\"\\n\")\n",
+        "print(\"writing dsdict.yaml...\")\n",
+        "\n",
+        "if not dictionary_path:\n",
+        "    dict_path = \"/content/jpn_dict.txt\"\n",
+        "else:\n",
+        "    dict_path = dictionary_path\n",
+        "\n",
+        "# for symbols list\n",
+        "phoneme_dict_path = f\"{acoustic_folder_path}/dictionary.txt\"\n",
+        "\n",
+        "dsdict = \"dsdict.yaml\"\n",
+        "\n",
+        "def parse_phonemes(phonemes_str):\n",
+        "    return phonemes_str.split()\n",
+        "\n",
+        "entries = []\n",
+        "vowel_types = {\"a\", \"i\", \"u\", \"e\", \"o\", \"N\", \"M\", \"NG\", \"cl\", \"vf\"}\n",
+        "vowel_data = []\n",
+        "stop_data = []\n",
+        "\n",
+        "# Process the specified dictionary\n",
+        "with open(dict_path, \"r\") as f:\n",
+        "    for line in f:\n",
+        "        word, phonemes_str = line.strip().split(\"\\t\")\n",
+        "        phonemes = parse_phonemes(phonemes_str)\n",
+        "        if len(phonemes) == 1:\n",
+        "            entries.append({\"grapheme\": word, \"phonemes\": phonemes})\n",
+        "        else:\n",
+        "            entries.append({\"grapheme\": word, \"phonemes\": phonemes})\n",
+        "\n",
+        "with open(phoneme_dict_path, \"r\") as f:\n",
+        "    for line in f:\n",
+        "        phoneme, _ = line.strip().split(\"\\t\")\n",
+        "        phoneme_type = \"vowel\" if phoneme[0] in vowel_types else \"stop\"\n",
+        "        entry = {\"symbol\": phoneme, \"type\": phoneme_type}\n",
+        "        if phoneme_type == \"vowel\":\n",
+        "            vowel_data.append(entry)\n",
+        "        else:\n",
+        "            stop_data.append(entry)\n",
+        "\n",
+        "vowel_data.sort(key=lambda x: x[\"symbol\"])\n",
+        "stop_data.sort(key=lambda x: x[\"symbol\"])\n",
+        "\n",
+        "dsdict_path = os.path.join(\"/content/OU_compatible_files\", dsdict)\n",
+        "with open(dsdict_path, \"w\") as f:\n",
+        "    f.write(\"entries:\\n\")\n",
+        "    for entry in entries:\n",
+        "        f.write(f\"- grapheme: {entry['grapheme']}\\n\")\n",
+        "        f.write(\"  phonemes:\\n\")\n",
+        "        for phoneme in entry[\"phonemes\"]:\n",
+        "            f.write(f\"  - {phoneme}\\n\")\n",
+        "\n",
+        "    f.write(\"\\nsymbols:\\n\")\n",
+        "    for entry in vowel_data + stop_data:\n",
+        "        f.write(f\"- symbol: {entry['symbol']}\\n\")\n",
+        "        f.write(f\"  type: {entry['type']}\\n\")\n",
+        "\n",
+        "print(\"\\n\")\n",
+        "print(\"putting your vb together...\")\n",
+        "\n",
+        "if not variance_checkpoint_path:\n",
+        "    acoustic_1 = f\"{acoustic_onnx_exp}\" + \"/acoustic.onnx\"\n",
+        "    !rm /content/OU_compatible_files/enunux_base/acoustic.onnx\n",
+        "    !cp {acoustic_1} /content/OU_compatible_files/enunux_base >/dev/null 2>&1\n",
+        "    !rm /content/OU_compatible_files/enunux_base/phonemes.txt\n",
+        "    !cp {exp_folder}/onnx/acoustic/phonemes.txt /content/OU_compatible_files/enunux_base >/dev/null 2>&1\n",
+        "    !cp {dsdict_path} /content/OU_compatible_files/enunux_base >/dev/null 2>&1 #enunux doesnt need this but it doesnt hurt to include this file with it\n",
+        "    !mv /content/OU_compatible_files/enunux_base /content/OU_compatible_files/OU_voicebank\n",
+        "    !zip {exp_folder}/OU_compatible_vb.zip /content/OU_compatible_files/OU_voicebank/* >/dev/null 2>&1\n",
+        "\n",
         "else:\n",
-        "    !python /content/DiffSinger/scripts/export.py {model_type} --exp {folder_name} --out {exp_folder}"
+        "    acoustic_1 = f\"{acoustic_onnx_exp}\" + \"/acoustic.onnx\"\n",
+        "    variance_1 = f\"{variance_onnx_exp}\" + \"/variance.onnx\"\n",
+        "    variance_2 = f\"{variance_onnx_exp}\" + \"/pitch.onnx\"\n",
+        "    variance_3 = f\"{variance_onnx_exp}\" + \"/dur.onnx\"\n",
+        "    variance_4 = f\"{variance_onnx_exp}\" + \"/linguistic.onnx\"\n",
+        "    !rm /content/OU_compatible_files/variance_base/acoustic.onnx\n",
+        "    !cp {acoustic_1} /content/OU_compatible_files/variance_base >/dev/null 2>&1\n",
+        "    !rm /content/OU_compatible_files/variance_base/linguistic.onnx\n",
+        "    !cp {variance_4} /content/OU_compatible_files/variance_base >/dev/null 2>&1\n",
+        "    !rm /content/OU_compatible_files/variance_base/dsvariance/variance.onnx\n",
+        "    !cp {variance_1} /content/OU_compatible_files/variance_base/dsvariance/variance.onnx >/dev/null 2>&1\n",
+        "    !rm /content/OU_compatible_files/variance_base/dspitch/pitch.onnx\n",
+        "    !cp {variance_2} /content/OU_compatible_files/variance_base/dspitch/pitch.onnx >/dev/null 2>&1\n",
+        "    !rm /content/OU_compatible_files/variance_base/dsdur/dur.onnx\n",
+        "    !cp {variance_3} /content/OU_compatible_files/variance_base/dsdur/dur.onnx >/dev/null 2>&1\n",
+        "    !rm /content/OU_compatible_files/variance_base/phonemes.txt\n",
+        "    !cp {exp_folder}/onnx/acoustic/phonemes.txt /content/OU_compatible_files/variance_base\n",
+        "    !rm /content/OU_compatible_files/variance_base/dsdict.yaml\n",
+        "    !cp {dsdict_path} /content/OU_compatible_files/variance_base\n",
+        "    !mv /content/OU_compatible_files/variance_base /content/OU_compatible_files/OU_voicebank\n",
+        "    !zip {exp_folder}/OU_compatible_vb.zip /content/OU_compatible_files/OU_voicebank/* >/dev/null 2>&1\n",
+        "\n",
+        "print(\"\\n\")\n",
+        "print(\"Go extract and edit character.txt and character.yaml to your liking for OpenUtau <3\")\n"
       ],
       "metadata": {
         "id": "x33iZhZchEMW",
@@ -747,7 +929,7 @@
       "source": [
         "import os\n",
         "\n",
-        "#@title Generate enunux.yaml\n",
+        "#@title Generate enunux.yaml (not including grapheme)\n",
         "\n",
         "#@markdown <font size=\"-2.5\"> path to your dictionary.txt\n",
         "\n",
@@ -780,20 +962,7 @@
         "cellView": "form",
         "id": "LMHTaub-kMSw"
       },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "#@title Make OpenUtau compatible voicebank\n",
-        "#@markdown not working yet lmao COMING NEXT UPDATE THO I PROMISE"
-      ],
-      "metadata": {
-        "cellView": "form",
-        "id": "HxQLlcz7k-8n"
-      },
-      "execution_count": null,
+      "execution_count": 31,
       "outputs": []
     },
     {
@@ -808,7 +977,6 @@
         "todo list:\n",
         "- add support for premade/refined data\n",
         "- add multi-singer training\n",
-        "- add OpenUtau voicebank builder\n",
         "- add link to vocoder training notebook (yet to be ready) or add a vocoder training section\n",
         "\n",
         "If you want to add anything to this list then again, just ping or message me lmao"