Skip to content
This repository was archived by the owner on Dec 28, 2025. It is now read-only.

Commit 6c9b247

Browse files
committed
replace onnx export with OpenUtau voicebank builder
1 parent 10c9cf8 commit 6c9b247

File tree

1 file changed

+201
-33
lines changed

1 file changed

+201
-33
lines changed

DiffSinger_colab_notebook.ipynb

Lines changed: 201 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
"metadata": {
55
"colab": {
66
"provenance": [],
7-
"gpuType": "A100",
87
"collapsed_sections": [
98
"MP5rRkbTpnG8",
109
"Wv0gfI5feBSc",
@@ -13,7 +12,6 @@
1312
"FY40fGHEg9_i",
1413
"4sbU1aH5kGFE"
1514
],
16-
"machine_shape": "hm",
1715
"include_colab_link": true
1816
},
1917
"kernelspec": {
@@ -22,8 +20,7 @@
2220
},
2321
"language_info": {
2422
"name": "python"
25-
},
26-
"accelerator": "GPU"
23+
}
2724
},
2825
"cells": [
2926
{
@@ -685,21 +682,55 @@
685682
{
686683
"cell_type": "code",
687684
"source": [
688-
"#@markdown # Convert to ONNX for OpenUtau\n",
685+
"#@markdown # Build OpenUtau compatible voicebank\n",
689686
"%cd /content\n",
687+
"from IPython.display import clear_output\n",
688+
"clear_output()\n",
689+
"import os\n",
690+
"#@markdown <font size=\"-1.5\"> select this if you don't want to see the onnx converter's output\n",
691+
"no_output = True # @param {type:\"boolean\"}\n",
690692
"\n",
691-
"#@markdown <font size=\"-1.5\"> the type of the model you want to convert\n",
692-
"model_type = \"acoustic\" # @param [\"acoustic\", \"variance\"]\n",
693+
"#@markdown <font size=\"-1.5\"> path to your **ACOUSTIC CHECKPOINT**: automatically use latest checkpoint that is in the same folder\n",
694+
"acoustic_checkpoint_path = \"\" #@param{type:\"string'}\n",
695+
"acoustic_folder_name = os.path.basename(os.path.dirname(acoustic_checkpoint_path)) + \"_acoustic\"\n",
696+
"acoustic_folder_path = os.path.dirname(acoustic_checkpoint_path)\n",
693697
"\n",
694-
"#@markdown <font size=\"-1.5\"> path to your checkpoint's **FOLDER** (NOT the model itself) or path to your save_dir\n",
695-
"checkpoints_path = \"\" #@param{type:\"string'}\n",
696-
"folder_name = os.path.basename(checkpoints_path)\n",
698+
"#@markdown <font size=\"-1.5\"> path to your **VARIANCE CHECKPOINT** (leave blank if you don't have any): automatically use latest checkpoint that is in the same folder\n",
699+
"variance_checkpoint_path = \"\" #@param{type:\"string'}\n",
700+
"variance_folder_name = os.path.basename(os.path.dirname(variance_checkpoint_path)) + \"_variance\"\n",
701+
"variance_folder_path = os.path.dirname(variance_checkpoint_path)\n",
697702
"\n",
698-
"#@markdown <font size=\"-1.5\"> path to where you want to save your converted model and it's file\n",
703+
"#@markdown <font size=\"-1.5\"> path to your word to phoneme dict (leave blank to use default Japanese dict)\n",
704+
"dictionary_path = \"\" #@param{type:\"string\"}\n",
705+
"\n",
706+
"#@markdown <font size=\"-1.5\"> path to where you want to save your OpenUtau bank\n",
699707
"exp_folder = \"\" #@param{type:\"string\"}\n",
700708
"\n",
709+
"acoustic_onnx_exp = exp_folder + \"/onnx/acoustic\"\n",
710+
"variance_onnx_exp = exp_folder + \"/onnx/variance\"\n",
711+
"\n",
712+
"print(\"\\n\")\n",
713+
"print(\"getting base files...\")\n",
714+
"\n",
715+
"!mkdir -p /content/OU_compatible_files/enunux_base\n",
716+
"!mkdir -p /content/OU_compatible_files/variance_base\n",
717+
"!wget https://github.com/MLo7Ghinsan/DiffSinger_colab_notebook_MLo7/releases/download/OU_files/enunux_base.zip >/dev/null 2>&1\n",
718+
"!wget https://github.com/MLo7Ghinsan/DiffSinger_colab_notebook_MLo7/releases/download/OU_files/variance_base.zip >/dev/null 2>&1\n",
719+
"!wget https://github.com/MLo7Ghinsan/DiffSinger_colab_notebook_MLo7/releases/download/OU_files/jpn_dict.txt >/dev/null 2>&1\n",
720+
"!unzip -q /content/enunux_base.zip -d /content/OU_compatible_files/enunux_base\n",
721+
"!unzip -q /content/variance_base.zip -d /content/OU_compatible_files/variance_base\n",
722+
"!rm /content/enunux_base.zip\n",
723+
"!rm /content/variance_base.zip\n",
724+
"\n",
725+
"!cp {acoustic_checkpoint_path} -r /content/DiffSinger/checkpoints/{acoustic_folder_name}\n",
726+
"!cp {acoustic_folder_path}/config.yaml -r /content/DiffSinger/checkpoints/{acoustic_folder_name}\n",
727+
"!cp {acoustic_folder_path}/dictionary.txt -r /content/DiffSinger/checkpoints/{acoustic_folder_name} # i dont think this is needed but its only one file oh well\n",
728+
"!cp {acoustic_folder_path}/spk_map.json -r /content/DiffSinger/checkpoints/{acoustic_folder_name}\n",
729+
"\n",
730+
"print(\"\\n\")\n",
731+
"print(\"converting acoustic to onnx...\")\n",
701732
"search_text = \" args_work_dir = os.path.join(\"\n",
702-
"replacement = f\" args_work_dir = '{checkpoints_path}'\"\n",
733+
"replacement = f\" args_work_dir = '{acoustic_folder_path}'\"\n",
703734
"with open(\"/content/DiffSinger/utils/hparams.py\", \"r\") as file:\n",
704735
" lines = file.readlines()\n",
705736
"for i, line in enumerate(lines):\n",
@@ -710,7 +741,7 @@
710741
" file.writelines(lines)\n",
711742
"#incase if anyone wanna change it lmao\n",
712743
"search_text_alt = \" args_work_dir = '\"\n",
713-
"replacement_alt = f\" args_work_dir = '{checkpoints_path}'\"\n",
744+
"replacement_alt = f\" args_work_dir = '{acoustic_folder_path}'\"\n",
714745
"with open(\"/content/DiffSinger/utils/hparams.py\", \"r\") as file:\n",
715746
" lines = file.readlines()\n",
716747
"for i, line in enumerate(lines):\n",
@@ -720,11 +751,162 @@
720751
"with open(\"/content/DiffSinger/utils/hparams.py\", \"w\") as file:\n",
721752
" file.writelines(lines)\n",
722753
"\n",
723-
"!cp {checkpoints_path} -r /content/DiffSinger/checkpoints\n",
724-
"if no_warn:\n",
725-
" !python /content/DiffSinger/scripts/export.py {model_type} --exp {folder_name} --out {exp_folder} 2> /dev/null\n",
754+
"#if no_output:\n",
755+
"# !python /content/DiffSinger/scripts/export.py acoustic --exp {acoustic_folder_name} --out {exp_folder}/onnx/acoustic >/dev/null 2>&1\n",
756+
"#else:\n",
757+
"# !python /content/DiffSinger/scripts/export.py acoustic --exp {acoustic_folder_name} --out {exp_folder}/onnx/acoustic\n",
758+
"\n",
759+
"\n",
760+
"if not variance_checkpoint_path:\n",
761+
" print(\"\\n\")\n",
762+
" print(\"variance ckeckpoint path not specified, using enunux instead...\")\n",
763+
"else:\n",
764+
" print(\"\\n\")\n",
765+
" print(\"converting variance to onnx...\")\n",
766+
" !cp {variance_checkpoint_path} -r /content/DiffSinger/checkpoints/{variance_folder_name}\n",
767+
" !cp {variance_folder_path}/config.yaml -r /content/DiffSinger/checkpoints/{variance_folder_name}\n",
768+
" !cp {variance_folder_path}/dictionary.txt -r /content/DiffSinger/checkpoints/{variance_folder_name} # i dont think this is needed but its only one file oh well\n",
769+
" !cp {variance_folder_path}/spk_map.json -r /content/DiffSinger/checkpoints/{variance_folder_name}\n",
770+
" search_text = \" args_work_dir = os.path.join(\"\n",
771+
" replacement = f\" args_work_dir = '{variance_folder_path}'\"\n",
772+
" with open(\"/content/DiffSinger/utils/hparams.py\", \"r\") as file:\n",
773+
" lines = file.readlines()\n",
774+
" for i, line in enumerate(lines):\n",
775+
" if search_text in line:\n",
776+
" lines[i] = replacement + \"\\n\"\n",
777+
" break\n",
778+
" with open(\"/content/DiffSinger/utils/hparams.py\", \"w\") as file:\n",
779+
" file.writelines(lines)\n",
780+
" #incase if anyone wanna change it lmao\n",
781+
" search_text_alt = \" args_work_dir = '\"\n",
782+
" replacement_alt = f\" args_work_dir = '{variance_folder_path}'\"\n",
783+
" with open(\"/content/DiffSinger/utils/hparams.py\", \"r\") as file:\n",
784+
" lines = file.readlines()\n",
785+
" for i, line in enumerate(lines):\n",
786+
" if search_text_alt in line:\n",
787+
" lines[i] = replacement_alt + \"\\n\"\n",
788+
" break\n",
789+
" with open(\"/content/DiffSinger/utils/hparams.py\", \"w\") as file:\n",
790+
" file.writelines(lines)\n",
791+
" if no_output:\n",
792+
" !python /content/DiffSinger/scripts/export.py variance --exp {variance_folder_name} --out {exp_folder}/onnx/variance >/dev/null 2>&1\n",
793+
" else:\n",
794+
" !python /content/DiffSinger/scripts/export.py variance --exp {variance_folder_name} --out {exp_folder}/onnx/variance\n",
795+
"\n",
796+
"if not variance_checkpoint_path:\n",
797+
" folder_paths = [acoustic_onnx_exp]\n",
798+
"else:\n",
799+
" folder_paths = [acoustic_onnx_exp, variance_onnx_exp]\n",
800+
"\n",
801+
"#renaming these so its gonna be easier\n",
802+
"patterns = {\"acoustic.onnx\": \"acoustic.onnx\", \"dur.onnx\": \"dur.onnx\", \"linguistic.onnx\": \"linguistic.onnx\", \"pitch.onnx\": \"pitch.onnx\", \"variance.onnx\": \"variance.onnx\", \"phonemes.txt\": \"phonemes.txt\"}\n",
803+
"\n",
804+
"\n",
805+
"for folder_path in folder_paths:\n",
806+
" for filename in os.listdir(folder_path):\n",
807+
" for pattern, new_name in patterns.items():\n",
808+
" if pattern in filename:\n",
809+
" old_path = os.path.join(folder_path, filename)\n",
810+
" new_path = os.path.join(folder_path, new_name)\n",
811+
" if os.path.exists(old_path):\n",
812+
" os.rename(old_path, new_path)\n",
813+
"\n",
814+
"print(\"\\n\")\n",
815+
"print(\"writing dsdict.yaml...\")\n",
816+
"\n",
817+
"if not dictionary_path:\n",
818+
" dict_path = \"/content/jpn_dict.txt\"\n",
819+
"else:\n",
820+
" dict_path = dictionary_path\n",
821+
"\n",
822+
"# for symbols list\n",
823+
"phoneme_dict_path = f\"{acoustic_folder_path}/dictionary.txt\"\n",
824+
"\n",
825+
"dsdict = \"dsdict.yaml\"\n",
826+
"\n",
827+
"def parse_phonemes(phonemes_str):\n",
828+
" return phonemes_str.split()\n",
829+
"\n",
830+
"entries = []\n",
831+
"vowel_types = {\"a\", \"i\", \"u\", \"e\", \"o\", \"N\", \"M\", \"NG\", \"cl\", \"vf\"}\n",
832+
"vowel_data = []\n",
833+
"stop_data = []\n",
834+
"\n",
835+
"# Process the specified dictionary\n",
836+
"with open(dict_path, \"r\") as f:\n",
837+
" for line in f:\n",
838+
" word, phonemes_str = line.strip().split(\"\\t\")\n",
839+
" phonemes = parse_phonemes(phonemes_str)\n",
840+
" if len(phonemes) == 1:\n",
841+
" entries.append({\"grapheme\": word, \"phonemes\": phonemes})\n",
842+
" else:\n",
843+
" entries.append({\"grapheme\": word, \"phonemes\": phonemes})\n",
844+
"\n",
845+
"with open(phoneme_dict_path, \"r\") as f:\n",
846+
" for line in f:\n",
847+
" phoneme, _ = line.strip().split(\"\\t\")\n",
848+
" phoneme_type = \"vowel\" if phoneme[0] in vowel_types else \"stop\"\n",
849+
" entry = {\"symbol\": phoneme, \"type\": phoneme_type}\n",
850+
" if phoneme_type == \"vowel\":\n",
851+
" vowel_data.append(entry)\n",
852+
" else:\n",
853+
" stop_data.append(entry)\n",
854+
"\n",
855+
"vowel_data.sort(key=lambda x: x[\"symbol\"])\n",
856+
"stop_data.sort(key=lambda x: x[\"symbol\"])\n",
857+
"\n",
858+
"dsdict_path = os.path.join(\"/content/OU_compatible_files\", dsdict)\n",
859+
"with open(dsdict_path, \"w\") as f:\n",
860+
" f.write(\"entries:\\n\")\n",
861+
" for entry in entries:\n",
862+
" f.write(f\"- grapheme: {entry['grapheme']}\\n\")\n",
863+
" f.write(\" phonemes:\\n\")\n",
864+
" for phoneme in entry[\"phonemes\"]:\n",
865+
" f.write(f\" - {phoneme}\\n\")\n",
866+
"\n",
867+
" f.write(\"\\nsymbols:\\n\")\n",
868+
" for entry in vowel_data + stop_data:\n",
869+
" f.write(f\"- symbol: {entry['symbol']}\\n\")\n",
870+
" f.write(f\" type: {entry['type']}\\n\")\n",
871+
"\n",
872+
"print(\"\\n\")\n",
873+
"print(\"putting your vb together...\")\n",
874+
"\n",
875+
"if not variance_checkpoint_path:\n",
876+
" acoustic_1 = f\"{acoustic_onnx_exp}\" + \"/acoustic.onnx\"\n",
877+
" !rm /content/OU_compatible_files/enunux_base/acoustic.onnx\n",
878+
" !cp {acoustic_1} /content/OU_compatible_files/enunux_base >/dev/null 2>&1\n",
879+
" !rm /content/OU_compatible_files/enunux_base/phonemes.txt\n",
880+
" !cp {exp_folder}/onnx/acoustic/phonemes.txt /content/OU_compatible_files/enunux_base >/dev/null 2>&1\n",
881+
" !cp {dsdict_path} /content/OU_compatible_files/enunux_base >/dev/null 2>&1 #enunux doesnt need this but it doesnt hurt to include this file with it\n",
882+
" !mv /content/OU_compatible_files/enunux_base /content/OU_compatible_files/OU_voicebank\n",
883+
" !zip {exp_folder}/OU_compatible_vb.zip /content/OU_compatible_files/OU_voicebank/* >/dev/null 2>&1\n",
884+
"\n",
726885
"else:\n",
727-
" !python /content/DiffSinger/scripts/export.py {model_type} --exp {folder_name} --out {exp_folder}"
886+
" acoustic_1 = f\"{acoustic_onnx_exp}\" + \"/acoustic.onnx\"\n",
887+
" variance_1 = f\"{variance_onnx_exp}\" + \"/variance.onnx\"\n",
888+
" variance_2 = f\"{variance_onnx_exp}\" + \"/pitch.onnx\"\n",
889+
" variance_3 = f\"{variance_onnx_exp}\" + \"/dur.onnx\"\n",
890+
" variance_4 = f\"{variance_onnx_exp}\" + \"/linguistic.onnx\"\n",
891+
" !rm /content/OU_compatible_files/variance_base/acoustic.onnx\n",
892+
" !cp {acoustic_1} /content/OU_compatible_files/variance_base >/dev/null 2>&1\n",
893+
" !rm /content/OU_compatible_files/variance_base/linguistic.onnx\n",
894+
" !cp {variance_4} /content/OU_compatible_files/variance_base >/dev/null 2>&1\n",
895+
" !rm /content/OU_compatible_files/variance_base/dsvariance/variance.onnx\n",
896+
" !cp {variance_1} /content/OU_compatible_files/variance_base/dsvariance/variance.onnx >/dev/null 2>&1\n",
897+
" !rm /content/OU_compatible_files/variance_base/dspitch/pitch.onnx\n",
898+
" !cp {variance_2} /content/OU_compatible_files/variance_base/dspitch/pitch.onnx >/dev/null 2>&1\n",
899+
" !rm /content/OU_compatible_files/variance_base/dsdur/dur.onnx\n",
900+
" !cp {variance_3} /content/OU_compatible_files/variance_base/dsdur/dur.onnx >/dev/null 2>&1\n",
901+
" !rm /content/OU_compatible_files/variance_base/phonemes.txt\n",
902+
" !cp {exp_folder}/onnx/acoustic/phonemes.txt /content/OU_compatible_files/variance_base\n",
903+
" !rm /content/OU_compatible_files/variance_base/dsdict.yaml\n",
904+
" !cp {dsdict_path} /content/OU_compatible_files/variance_base\n",
905+
" !mv /content/OU_compatible_files/variance_base /content/OU_compatible_files/OU_voicebank\n",
906+
" !zip {exp_folder}/OU_compatible_vb.zip /content/OU_compatible_files/OU_voicebank/* >/dev/null 2>&1\n",
907+
"\n",
908+
"print(\"\\n\")\n",
909+
"print(\"Go extract and edit character.txt and character.yaml to your liking for OpenUtau <3\")\n"
728910
],
729911
"metadata": {
730912
"id": "x33iZhZchEMW",
@@ -747,7 +929,7 @@
747929
"source": [
748930
"import os\n",
749931
"\n",
750-
"#@title Generate enunux.yaml\n",
932+
"#@title Generate enunux.yaml (not including grapheme)\n",
751933
"\n",
752934
"#@markdown <font size=\"-2.5\"> path to your dictionary.txt\n",
753935
"\n",
@@ -780,20 +962,7 @@
780962
"cellView": "form",
781963
"id": "LMHTaub-kMSw"
782964
},
783-
"execution_count": null,
784-
"outputs": []
785-
},
786-
{
787-
"cell_type": "code",
788-
"source": [
789-
"#@title Make OpenUtau compatible voicebank\n",
790-
"#@markdown not working yet lmao COMING NEXT UPDATE THO I PROMISE"
791-
],
792-
"metadata": {
793-
"cellView": "form",
794-
"id": "HxQLlcz7k-8n"
795-
},
796-
"execution_count": null,
965+
"execution_count": 31,
797966
"outputs": []
798967
},
799968
{
@@ -808,7 +977,6 @@
808977
"todo list:\n",
809978
"- add support for premade/refined data\n",
810979
"- add multi-singer training\n",
811-
"- add OpenUtau voicebank builder\n",
812980
"- add link to vocoder training notebook (yet to be ready) or add a vocoder training section\n",
813981
"\n",
814982
"If you want to add anything to this list then again, just ping or message me lmao"

0 commit comments

Comments
 (0)