Skip to content

Commit 067731d

Browse files
768VecOnnxExport (#328)
* Delete export_onnx.py * Delete export_onnx_old.py * Delete models_onnx_moess.py * Support 768 Vec * Add files via upload * Support 768 Vec Support 768 Vec * Support 768 Vec Onnx Export Support 768 Vec Onnx Export
1 parent c3de24f commit 067731d

File tree

5 files changed

+144
-1046
lines changed

5 files changed

+144
-1046
lines changed

export_onnx.py

Lines changed: 34 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
from infer_pack.models_onnx_moess import SynthesizerTrnMs256NSFsidM
2-
from infer_pack.models_onnx import SynthesizerTrnMs256NSFsidO
1+
from infer_pack.models_onnx import SynthesizerTrnMsNSFsidM
32
import torch
43

54
if __name__ == "__main__":
@@ -21,65 +20,36 @@
2120

2221
device = "cpu" # 导出时设备(不影响使用模型)
2322

24-
if MoeVS:
25-
net_g = SynthesizerTrnMs256NSFsidM(
26-
*cpt["config"], is_half=False
27-
) # fp32导出(C++要支持fp16必须手动将内存重新排列所以暂时不用fp16)
28-
net_g.load_state_dict(cpt["weight"], strict=False)
29-
input_names = ["phone", "phone_lengths", "pitch", "pitchf", "ds", "rnd"]
30-
output_names = [
31-
"audio",
32-
]
33-
torch.onnx.export(
34-
net_g,
35-
(
36-
test_phone.to(device),
37-
test_phone_lengths.to(device),
38-
test_pitch.to(device),
39-
test_pitchf.to(device),
40-
test_ds.to(device),
41-
test_rnd.to(device),
42-
),
43-
ExportedPath,
44-
dynamic_axes={
45-
"phone": [1],
46-
"pitch": [1],
47-
"pitchf": [1],
48-
"rnd": [2],
49-
},
50-
do_constant_folding=False,
51-
opset_version=16,
52-
verbose=False,
53-
input_names=input_names,
54-
output_names=output_names,
55-
)
56-
else:
57-
net_g = SynthesizerTrnMs256NSFsidO(
58-
*cpt["config"], is_half=False
59-
) # fp32导出(C++要支持fp16必须手动将内存重新排列所以暂时不用fp16)
60-
net_g.load_state_dict(cpt["weight"], strict=False)
61-
input_names = ["phone", "phone_lengths", "pitch", "pitchf", "ds"]
62-
output_names = [
63-
"audio",
64-
]
65-
torch.onnx.export(
66-
net_g,
67-
(
68-
test_phone.to(device),
69-
test_phone_lengths.to(device),
70-
test_pitch.to(device),
71-
test_pitchf.to(device),
72-
test_ds.to(device),
73-
),
74-
ExportedPath,
75-
dynamic_axes={
76-
"phone": [1],
77-
"pitch": [1],
78-
"pitchf": [1],
79-
},
80-
do_constant_folding=False,
81-
opset_version=16,
82-
verbose=False,
83-
input_names=input_names,
84-
output_names=output_names,
85-
)
23+
net_g = SynthesizerTrnMsNSFsidM(
24+
*cpt["config"], is_half=False
25+
) # fp32导出(C++要支持fp16必须手动将内存重新排列所以暂时不用fp16)
26+
net_g.load_state_dict(cpt["weight"], strict=False)
27+
input_names = ["phone", "phone_lengths", "pitch", "pitchf", "ds", "rnd"]
28+
output_names = [
29+
"audio",
30+
]
31+
# net_g.construct_spkmixmap(n_speaker) 多角色混合轨道导出
32+
torch.onnx.export(
33+
net_g,
34+
(
35+
test_phone.to(device),
36+
test_phone_lengths.to(device),
37+
test_pitch.to(device),
38+
test_pitchf.to(device),
39+
test_ds.to(device),
40+
test_rnd.to(device),
41+
),
42+
ExportedPath,
43+
dynamic_axes={
44+
"phone": [1],
45+
"pitch": [1],
46+
"pitchf": [1],
47+
"rnd": [2],
48+
},
49+
do_constant_folding=False,
50+
opset_version=16,
51+
verbose=False,
52+
input_names=input_names,
53+
output_names=output_names,
54+
)
55+

export_onnx_old.py

Lines changed: 0 additions & 47 deletions
This file was deleted.

infer-web.py

Lines changed: 34 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -1068,15 +1068,11 @@ def change_info_(ckpt_path):
10681068
return {"__type__": "update"}, {"__type__": "update"}, {"__type__": "update"}
10691069

10701070

1071-
from infer_pack.models_onnx_moess import SynthesizerTrnMs256NSFsidM
1072-
from infer_pack.models_onnx import SynthesizerTrnMs256NSFsidO
1073-
1074-
1071+
from infer_pack.models_onnx import SynthesizerTrnMsNSFsidM
10751072
def export_onnx(ModelPath, ExportedPath, MoeVS=True):
1076-
hidden_channels = 256 # hidden_channels,为768Vec做准备
10771073
cpt = torch.load(ModelPath, map_location="cpu")
10781074
cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk
1079-
print(*cpt["config"])
1075+
hidden_channels = cpt["config"][-2] # hidden_channels,为768Vec做准备
10801076

10811077
test_phone = torch.rand(1, 200, hidden_channels) # hidden unit
10821078
test_phone_lengths = torch.tensor([200]).long() # hidden unit 长度(貌似没啥用)
@@ -1087,68 +1083,38 @@ def export_onnx(ModelPath, ExportedPath, MoeVS=True):
10871083

10881084
device = "cpu" # 导出时设备(不影响使用模型)
10891085

1090-
if MoeVS:
1091-
net_g = SynthesizerTrnMs256NSFsidM(
1092-
*cpt["config"], is_half=False
1093-
) # fp32导出(C++要支持fp16必须手动将内存重新排列所以暂时不用fp16)
1094-
net_g.load_state_dict(cpt["weight"], strict=False)
1095-
input_names = ["phone", "phone_lengths", "pitch", "pitchf", "ds", "rnd"]
1096-
output_names = [
1097-
"audio",
1098-
]
1099-
torch.onnx.export(
1100-
net_g,
1101-
(
1102-
test_phone.to(device),
1103-
test_phone_lengths.to(device),
1104-
test_pitch.to(device),
1105-
test_pitchf.to(device),
1106-
test_ds.to(device),
1107-
test_rnd.to(device),
1108-
),
1109-
ExportedPath,
1110-
dynamic_axes={
1111-
"phone": [1],
1112-
"pitch": [1],
1113-
"pitchf": [1],
1114-
"rnd": [2],
1115-
},
1116-
do_constant_folding=False,
1117-
opset_version=16,
1118-
verbose=False,
1119-
input_names=input_names,
1120-
output_names=output_names,
1121-
)
1122-
else:
1123-
net_g = SynthesizerTrnMs256NSFsidO(
1124-
*cpt["config"], is_half=False
1125-
) # fp32导出(C++要支持fp16必须手动将内存重新排列所以暂时不用fp16)
1126-
net_g.load_state_dict(cpt["weight"], strict=False)
1127-
input_names = ["phone", "phone_lengths", "pitch", "pitchf", "ds"]
1128-
output_names = [
1129-
"audio",
1130-
]
1131-
torch.onnx.export(
1132-
net_g,
1133-
(
1134-
test_phone.to(device),
1135-
test_phone_lengths.to(device),
1136-
test_pitch.to(device),
1137-
test_pitchf.to(device),
1138-
test_ds.to(device),
1139-
),
1140-
ExportedPath,
1141-
dynamic_axes={
1142-
"phone": [1],
1143-
"pitch": [1],
1144-
"pitchf": [1],
1145-
},
1146-
do_constant_folding=False,
1147-
opset_version=16,
1148-
verbose=False,
1149-
input_names=input_names,
1150-
output_names=output_names,
1151-
)
1086+
net_g = SynthesizerTrnMsNSFsidM(
1087+
*cpt["config"], is_half=False
1088+
) # fp32导出(C++要支持fp16必须手动将内存重新排列所以暂时不用fp16)
1089+
net_g.load_state_dict(cpt["weight"], strict=False)
1090+
input_names = ["phone", "phone_lengths", "pitch", "pitchf", "ds", "rnd"]
1091+
output_names = [
1092+
"audio",
1093+
]
1094+
# net_g.construct_spkmixmap(n_speaker) 多角色混合轨道导出
1095+
torch.onnx.export(
1096+
net_g,
1097+
(
1098+
test_phone.to(device),
1099+
test_phone_lengths.to(device),
1100+
test_pitch.to(device),
1101+
test_pitchf.to(device),
1102+
test_ds.to(device),
1103+
test_rnd.to(device),
1104+
),
1105+
ExportedPath,
1106+
dynamic_axes={
1107+
"phone": [1],
1108+
"pitch": [1],
1109+
"pitchf": [1],
1110+
"rnd": [2],
1111+
},
1112+
do_constant_folding=False,
1113+
opset_version=16,
1114+
verbose=False,
1115+
input_names=input_names,
1116+
output_names=output_names,
1117+
)
11521118
return "Finished"
11531119

11541120

0 commit comments

Comments
 (0)