|
4 | 4 | "metadata": { |
5 | 5 | "colab": { |
6 | 6 | "provenance": [], |
7 | | - "gpuType": "A100", |
8 | 7 | "collapsed_sections": [ |
9 | 8 | "MP5rRkbTpnG8", |
10 | 9 | "Wv0gfI5feBSc", |
|
13 | 12 | "FY40fGHEg9_i", |
14 | 13 | "4sbU1aH5kGFE" |
15 | 14 | ], |
16 | | - "machine_shape": "hm", |
17 | 15 | "include_colab_link": true |
18 | 16 | }, |
19 | 17 | "kernelspec": { |
|
22 | 20 | }, |
23 | 21 | "language_info": { |
24 | 22 | "name": "python" |
25 | | - }, |
26 | | - "accelerator": "GPU" |
| 23 | + } |
27 | 24 | }, |
28 | 25 | "cells": [ |
29 | 26 | { |
|
685 | 682 | { |
686 | 683 | "cell_type": "code", |
687 | 684 | "source": [ |
688 | | - "#@markdown # Convert to ONNX for OpenUtau\n", |
| 685 | + "#@markdown # Build OpenUtau compatible voicebank\n", |
689 | 686 | "%cd /content\n", |
| 687 | + "from IPython.display import clear_output\n", |
| 688 | + "clear_output()\n", |
| 689 | + "import os\n", |
| 690 | + "#@markdown <font size=\"-1.5\"> select this if you don't want to see the onnx converter's output\n", |
| 691 | + "no_output = True # @param {type:\"boolean\"}\n", |
690 | 692 | "\n", |
691 | | - "#@markdown <font size=\"-1.5\"> the type of the model you want to convert\n", |
692 | | - "model_type = \"acoustic\" # @param [\"acoustic\", \"variance\"]\n", |
| 693 | + "#@markdown <font size=\"-1.5\"> path to your **ACOUSTIC CHECKPOINT**: automatically use latest checkpoint that is in the same folder\n", |
| 694 | + "acoustic_checkpoint_path = \"\" #@param{type:\"string'}\n", |
| 695 | + "acoustic_folder_name = os.path.basename(os.path.dirname(acoustic_checkpoint_path)) + \"_acoustic\"\n", |
| 696 | + "acoustic_folder_path = os.path.dirname(acoustic_checkpoint_path)\n", |
693 | 697 | "\n", |
694 | | - "#@markdown <font size=\"-1.5\"> path to your checkpoint's **FOLDER** (NOT the model itself) or path to your save_dir\n", |
695 | | - "checkpoints_path = \"\" #@param{type:\"string'}\n", |
696 | | - "folder_name = os.path.basename(checkpoints_path)\n", |
| 698 | + "#@markdown <font size=\"-1.5\"> path to your **VARIANCE CHECKPOINT** (leave blank if you don't have any): automatically use latest checkpoint that is in the same folder\n", |
| 699 | + "variance_checkpoint_path = \"\" #@param{type:\"string'}\n", |
| 700 | + "variance_folder_name = os.path.basename(os.path.dirname(variance_checkpoint_path)) + \"_variance\"\n", |
| 701 | + "variance_folder_path = os.path.dirname(variance_checkpoint_path)\n", |
697 | 702 | "\n", |
698 | | - "#@markdown <font size=\"-1.5\"> path to where you want to save your converted model and it's file\n", |
| 703 | + "#@markdown <font size=\"-1.5\"> path to your word to phoneme dict (leave blank to use default Japanese dict)\n", |
| 704 | + "dictionary_path = \"\" #@param{type:\"string\"}\n", |
| 705 | + "\n", |
| 706 | + "#@markdown <font size=\"-1.5\"> path to where you want to save your OpenUtau bank\n", |
699 | 707 | "exp_folder = \"\" #@param{type:\"string\"}\n", |
700 | 708 | "\n", |
| 709 | + "acoustic_onnx_exp = exp_folder + \"/onnx/acoustic\"\n", |
| 710 | + "variance_onnx_exp = exp_folder + \"/onnx/variance\"\n", |
| 711 | + "\n", |
| 712 | + "print(\"\\n\")\n", |
| 713 | + "print(\"getting base files...\")\n", |
| 714 | + "\n", |
| 715 | + "!mkdir -p /content/OU_compatible_files/enunux_base\n", |
| 716 | + "!mkdir -p /content/OU_compatible_files/variance_base\n", |
| 717 | + "!wget https://github.com/MLo7Ghinsan/DiffSinger_colab_notebook_MLo7/releases/download/OU_files/enunux_base.zip >/dev/null 2>&1\n", |
| 718 | + "!wget https://github.com/MLo7Ghinsan/DiffSinger_colab_notebook_MLo7/releases/download/OU_files/variance_base.zip >/dev/null 2>&1\n", |
| 719 | + "!wget https://github.com/MLo7Ghinsan/DiffSinger_colab_notebook_MLo7/releases/download/OU_files/jpn_dict.txt >/dev/null 2>&1\n", |
| 720 | + "!unzip -q /content/enunux_base.zip -d /content/OU_compatible_files/enunux_base\n", |
| 721 | + "!unzip -q /content/variance_base.zip -d /content/OU_compatible_files/variance_base\n", |
| 722 | + "!rm /content/enunux_base.zip\n", |
| 723 | + "!rm /content/variance_base.zip\n", |
| 724 | + "\n", |
| 725 | + "!cp {acoustic_checkpoint_path} -r /content/DiffSinger/checkpoints/{acoustic_folder_name}\n", |
| 726 | + "!cp {acoustic_folder_path}/config.yaml -r /content/DiffSinger/checkpoints/{acoustic_folder_name}\n", |
| 727 | + "!cp {acoustic_folder_path}/dictionary.txt -r /content/DiffSinger/checkpoints/{acoustic_folder_name} # i dont think this is needed but its only one file oh well\n", |
| 728 | + "!cp {acoustic_folder_path}/spk_map.json -r /content/DiffSinger/checkpoints/{acoustic_folder_name}\n", |
| 729 | + "\n", |
| 730 | + "print(\"\\n\")\n", |
| 731 | + "print(\"converting acoustic to onnx...\")\n", |
701 | 732 | "search_text = \" args_work_dir = os.path.join(\"\n", |
702 | | - "replacement = f\" args_work_dir = '{checkpoints_path}'\"\n", |
| 733 | + "replacement = f\" args_work_dir = '{acoustic_folder_path}'\"\n", |
703 | 734 | "with open(\"/content/DiffSinger/utils/hparams.py\", \"r\") as file:\n", |
704 | 735 | " lines = file.readlines()\n", |
705 | 736 | "for i, line in enumerate(lines):\n", |
|
710 | 741 | " file.writelines(lines)\n", |
711 | 742 | "#incase if anyone wanna change it lmao\n", |
712 | 743 | "search_text_alt = \" args_work_dir = '\"\n", |
713 | | - "replacement_alt = f\" args_work_dir = '{checkpoints_path}'\"\n", |
| 744 | + "replacement_alt = f\" args_work_dir = '{acoustic_folder_path}'\"\n", |
714 | 745 | "with open(\"/content/DiffSinger/utils/hparams.py\", \"r\") as file:\n", |
715 | 746 | " lines = file.readlines()\n", |
716 | 747 | "for i, line in enumerate(lines):\n", |
|
720 | 751 | "with open(\"/content/DiffSinger/utils/hparams.py\", \"w\") as file:\n", |
721 | 752 | " file.writelines(lines)\n", |
722 | 753 | "\n", |
723 | | - "!cp {checkpoints_path} -r /content/DiffSinger/checkpoints\n", |
724 | | - "if no_warn:\n", |
725 | | - " !python /content/DiffSinger/scripts/export.py {model_type} --exp {folder_name} --out {exp_folder} 2> /dev/null\n", |
| 754 | + "#if no_output:\n", |
| 755 | + "# !python /content/DiffSinger/scripts/export.py acoustic --exp {acoustic_folder_name} --out {exp_folder}/onnx/acoustic >/dev/null 2>&1\n", |
| 756 | + "#else:\n", |
| 757 | + "# !python /content/DiffSinger/scripts/export.py acoustic --exp {acoustic_folder_name} --out {exp_folder}/onnx/acoustic\n", |
| 758 | + "\n", |
| 759 | + "\n", |
| 760 | + "if not variance_checkpoint_path:\n", |
| 761 | + " print(\"\\n\")\n", |
| 762 | + " print(\"variance ckeckpoint path not specified, using enunux instead...\")\n", |
| 763 | + "else:\n", |
| 764 | + " print(\"\\n\")\n", |
| 765 | + " print(\"converting variance to onnx...\")\n", |
| 766 | + " !cp {variance_checkpoint_path} -r /content/DiffSinger/checkpoints/{variance_folder_name}\n", |
| 767 | + " !cp {variance_folder_path}/config.yaml -r /content/DiffSinger/checkpoints/{variance_folder_name}\n", |
| 768 | + " !cp {variance_folder_path}/dictionary.txt -r /content/DiffSinger/checkpoints/{variance_folder_name} # i dont think this is needed but its only one file oh well\n", |
| 769 | + " !cp {variance_folder_path}/spk_map.json -r /content/DiffSinger/checkpoints/{variance_folder_name}\n", |
| 770 | + " search_text = \" args_work_dir = os.path.join(\"\n", |
| 771 | + " replacement = f\" args_work_dir = '{variance_folder_path}'\"\n", |
| 772 | + " with open(\"/content/DiffSinger/utils/hparams.py\", \"r\") as file:\n", |
| 773 | + " lines = file.readlines()\n", |
| 774 | + " for i, line in enumerate(lines):\n", |
| 775 | + " if search_text in line:\n", |
| 776 | + " lines[i] = replacement + \"\\n\"\n", |
| 777 | + " break\n", |
| 778 | + " with open(\"/content/DiffSinger/utils/hparams.py\", \"w\") as file:\n", |
| 779 | + " file.writelines(lines)\n", |
| 780 | + " #incase if anyone wanna change it lmao\n", |
| 781 | + " search_text_alt = \" args_work_dir = '\"\n", |
| 782 | + " replacement_alt = f\" args_work_dir = '{variance_folder_path}'\"\n", |
| 783 | + " with open(\"/content/DiffSinger/utils/hparams.py\", \"r\") as file:\n", |
| 784 | + " lines = file.readlines()\n", |
| 785 | + " for i, line in enumerate(lines):\n", |
| 786 | + " if search_text_alt in line:\n", |
| 787 | + " lines[i] = replacement_alt + \"\\n\"\n", |
| 788 | + " break\n", |
| 789 | + " with open(\"/content/DiffSinger/utils/hparams.py\", \"w\") as file:\n", |
| 790 | + " file.writelines(lines)\n", |
| 791 | + " if no_output:\n", |
| 792 | + " !python /content/DiffSinger/scripts/export.py variance --exp {variance_folder_name} --out {exp_folder}/onnx/variance >/dev/null 2>&1\n", |
| 793 | + " else:\n", |
| 794 | + " !python /content/DiffSinger/scripts/export.py variance --exp {variance_folder_name} --out {exp_folder}/onnx/variance\n", |
| 795 | + "\n", |
| 796 | + "if not variance_checkpoint_path:\n", |
| 797 | + " folder_paths = [acoustic_onnx_exp]\n", |
| 798 | + "else:\n", |
| 799 | + " folder_paths = [acoustic_onnx_exp, variance_onnx_exp]\n", |
| 800 | + "\n", |
| 801 | + "#renaming these so its gonna be easier\n", |
| 802 | + "patterns = {\"acoustic.onnx\": \"acoustic.onnx\", \"dur.onnx\": \"dur.onnx\", \"linguistic.onnx\": \"linguistic.onnx\", \"pitch.onnx\": \"pitch.onnx\", \"variance.onnx\": \"variance.onnx\", \"phonemes.txt\": \"phonemes.txt\"}\n", |
| 803 | + "\n", |
| 804 | + "\n", |
| 805 | + "for folder_path in folder_paths:\n", |
| 806 | + " for filename in os.listdir(folder_path):\n", |
| 807 | + " for pattern, new_name in patterns.items():\n", |
| 808 | + " if pattern in filename:\n", |
| 809 | + " old_path = os.path.join(folder_path, filename)\n", |
| 810 | + " new_path = os.path.join(folder_path, new_name)\n", |
| 811 | + " if os.path.exists(old_path):\n", |
| 812 | + " os.rename(old_path, new_path)\n", |
| 813 | + "\n", |
| 814 | + "print(\"\\n\")\n", |
| 815 | + "print(\"writing dsdict.yaml...\")\n", |
| 816 | + "\n", |
| 817 | + "if not dictionary_path:\n", |
| 818 | + " dict_path = \"/content/jpn_dict.txt\"\n", |
| 819 | + "else:\n", |
| 820 | + " dict_path = dictionary_path\n", |
| 821 | + "\n", |
| 822 | + "# for symbols list\n", |
| 823 | + "phoneme_dict_path = f\"{acoustic_folder_path}/dictionary.txt\"\n", |
| 824 | + "\n", |
| 825 | + "dsdict = \"dsdict.yaml\"\n", |
| 826 | + "\n", |
| 827 | + "def parse_phonemes(phonemes_str):\n", |
| 828 | + " return phonemes_str.split()\n", |
| 829 | + "\n", |
| 830 | + "entries = []\n", |
| 831 | + "vowel_types = {\"a\", \"i\", \"u\", \"e\", \"o\", \"N\", \"M\", \"NG\", \"cl\", \"vf\"}\n", |
| 832 | + "vowel_data = []\n", |
| 833 | + "stop_data = []\n", |
| 834 | + "\n", |
| 835 | + "# Process the specified dictionary\n", |
| 836 | + "with open(dict_path, \"r\") as f:\n", |
| 837 | + " for line in f:\n", |
| 838 | + " word, phonemes_str = line.strip().split(\"\\t\")\n", |
| 839 | + " phonemes = parse_phonemes(phonemes_str)\n", |
| 840 | + " if len(phonemes) == 1:\n", |
| 841 | + " entries.append({\"grapheme\": word, \"phonemes\": phonemes})\n", |
| 842 | + " else:\n", |
| 843 | + " entries.append({\"grapheme\": word, \"phonemes\": phonemes})\n", |
| 844 | + "\n", |
| 845 | + "with open(phoneme_dict_path, \"r\") as f:\n", |
| 846 | + " for line in f:\n", |
| 847 | + " phoneme, _ = line.strip().split(\"\\t\")\n", |
| 848 | + " phoneme_type = \"vowel\" if phoneme[0] in vowel_types else \"stop\"\n", |
| 849 | + " entry = {\"symbol\": phoneme, \"type\": phoneme_type}\n", |
| 850 | + " if phoneme_type == \"vowel\":\n", |
| 851 | + " vowel_data.append(entry)\n", |
| 852 | + " else:\n", |
| 853 | + " stop_data.append(entry)\n", |
| 854 | + "\n", |
| 855 | + "vowel_data.sort(key=lambda x: x[\"symbol\"])\n", |
| 856 | + "stop_data.sort(key=lambda x: x[\"symbol\"])\n", |
| 857 | + "\n", |
| 858 | + "dsdict_path = os.path.join(\"/content/OU_compatible_files\", dsdict)\n", |
| 859 | + "with open(dsdict_path, \"w\") as f:\n", |
| 860 | + " f.write(\"entries:\\n\")\n", |
| 861 | + " for entry in entries:\n", |
| 862 | + " f.write(f\"- grapheme: {entry['grapheme']}\\n\")\n", |
| 863 | + " f.write(\" phonemes:\\n\")\n", |
| 864 | + " for phoneme in entry[\"phonemes\"]:\n", |
| 865 | + " f.write(f\" - {phoneme}\\n\")\n", |
| 866 | + "\n", |
| 867 | + " f.write(\"\\nsymbols:\\n\")\n", |
| 868 | + " for entry in vowel_data + stop_data:\n", |
| 869 | + " f.write(f\"- symbol: {entry['symbol']}\\n\")\n", |
| 870 | + " f.write(f\" type: {entry['type']}\\n\")\n", |
| 871 | + "\n", |
| 872 | + "print(\"\\n\")\n", |
| 873 | + "print(\"putting your vb together...\")\n", |
| 874 | + "\n", |
| 875 | + "if not variance_checkpoint_path:\n", |
| 876 | + " acoustic_1 = f\"{acoustic_onnx_exp}\" + \"/acoustic.onnx\"\n", |
| 877 | + " !rm /content/OU_compatible_files/enunux_base/acoustic.onnx\n", |
| 878 | + " !cp {acoustic_1} /content/OU_compatible_files/enunux_base >/dev/null 2>&1\n", |
| 879 | + " !rm /content/OU_compatible_files/enunux_base/phonemes.txt\n", |
| 880 | + " !cp {exp_folder}/onnx/acoustic/phonemes.txt /content/OU_compatible_files/enunux_base >/dev/null 2>&1\n", |
| 881 | + " !cp {dsdict_path} /content/OU_compatible_files/enunux_base >/dev/null 2>&1 #enunux doesnt need this but it doesnt hurt to include this file with it\n", |
| 882 | + " !mv /content/OU_compatible_files/enunux_base /content/OU_compatible_files/OU_voicebank\n", |
| 883 | + " !zip {exp_folder}/OU_compatible_vb.zip /content/OU_compatible_files/OU_voicebank/* >/dev/null 2>&1\n", |
| 884 | + "\n", |
726 | 885 | "else:\n", |
727 | | - " !python /content/DiffSinger/scripts/export.py {model_type} --exp {folder_name} --out {exp_folder}" |
| 886 | + " acoustic_1 = f\"{acoustic_onnx_exp}\" + \"/acoustic.onnx\"\n", |
| 887 | + " variance_1 = f\"{variance_onnx_exp}\" + \"/variance.onnx\"\n", |
| 888 | + " variance_2 = f\"{variance_onnx_exp}\" + \"/pitch.onnx\"\n", |
| 889 | + " variance_3 = f\"{variance_onnx_exp}\" + \"/dur.onnx\"\n", |
| 890 | + " variance_4 = f\"{variance_onnx_exp}\" + \"/linguistic.onnx\"\n", |
| 891 | + " !rm /content/OU_compatible_files/variance_base/acoustic.onnx\n", |
| 892 | + " !cp {acoustic_1} /content/OU_compatible_files/variance_base >/dev/null 2>&1\n", |
| 893 | + " !rm /content/OU_compatible_files/variance_base/linguistic.onnx\n", |
| 894 | + " !cp {variance_4} /content/OU_compatible_files/variance_base >/dev/null 2>&1\n", |
| 895 | + " !rm /content/OU_compatible_files/variance_base/dsvariance/variance.onnx\n", |
| 896 | + " !cp {variance_1} /content/OU_compatible_files/variance_base/dsvariance/variance.onnx >/dev/null 2>&1\n", |
| 897 | + " !rm /content/OU_compatible_files/variance_base/dspitch/pitch.onnx\n", |
| 898 | + " !cp {variance_2} /content/OU_compatible_files/variance_base/dspitch/pitch.onnx >/dev/null 2>&1\n", |
| 899 | + " !rm /content/OU_compatible_files/variance_base/dsdur/dur.onnx\n", |
| 900 | + " !cp {variance_3} /content/OU_compatible_files/variance_base/dsdur/dur.onnx >/dev/null 2>&1\n", |
| 901 | + " !rm /content/OU_compatible_files/variance_base/phonemes.txt\n", |
| 902 | + " !cp {exp_folder}/onnx/acoustic/phonemes.txt /content/OU_compatible_files/variance_base\n", |
| 903 | + " !rm /content/OU_compatible_files/variance_base/dsdict.yaml\n", |
| 904 | + " !cp {dsdict_path} /content/OU_compatible_files/variance_base\n", |
| 905 | + " !mv /content/OU_compatible_files/variance_base /content/OU_compatible_files/OU_voicebank\n", |
| 906 | + " !zip {exp_folder}/OU_compatible_vb.zip /content/OU_compatible_files/OU_voicebank/* >/dev/null 2>&1\n", |
| 907 | + "\n", |
| 908 | + "print(\"\\n\")\n", |
| 909 | + "print(\"Go extract and edit character.txt and character.yaml to your liking for OpenUtau <3\")\n" |
728 | 910 | ], |
729 | 911 | "metadata": { |
730 | 912 | "id": "x33iZhZchEMW", |
|
747 | 929 | "source": [ |
748 | 930 | "import os\n", |
749 | 931 | "\n", |
750 | | - "#@title Generate enunux.yaml\n", |
| 932 | + "#@title Generate enunux.yaml (not including grapheme)\n", |
751 | 933 | "\n", |
752 | 934 | "#@markdown <font size=\"-2.5\"> path to your dictionary.txt\n", |
753 | 935 | "\n", |
|
780 | 962 | "cellView": "form", |
781 | 963 | "id": "LMHTaub-kMSw" |
782 | 964 | }, |
783 | | - "execution_count": null, |
784 | | - "outputs": [] |
785 | | - }, |
786 | | - { |
787 | | - "cell_type": "code", |
788 | | - "source": [ |
789 | | - "#@title Make OpenUtau compatible voicebank\n", |
790 | | - "#@markdown not working yet lmao COMING NEXT UPDATE THO I PROMISE" |
791 | | - ], |
792 | | - "metadata": { |
793 | | - "cellView": "form", |
794 | | - "id": "HxQLlcz7k-8n" |
795 | | - }, |
796 | | - "execution_count": null, |
| 965 | + "execution_count": 31, |
797 | 966 | "outputs": [] |
798 | 967 | }, |
799 | 968 | { |
|
808 | 977 | "todo list:\n", |
809 | 978 | "- add support for premade/refined data\n", |
810 | 979 | "- add multi-singer training\n", |
811 | | - "- add OpenUtau voicebank builder\n", |
812 | 980 | "- add link to vocoder training notebook (yet to be ready) or add a vocoder training section\n", |
813 | 981 | "\n", |
814 | 982 | "If you want to add anything to this list then again, just ping or message me lmao" |
|
0 commit comments