@@ -54,8 +54,8 @@ def __init__(
54
54
cpt = torch .load (pth_path , map_location = "cpu" )
55
55
tgt_sr = cpt ["config" ][- 1 ]
56
56
cpt ["config" ][- 3 ] = cpt ["weight" ]["emb_g.weight" ].shape [0 ] # n_spk
57
- if_f0 = cpt .get ("f0" , 1 )
58
- if if_f0 == 1 :
57
+ self . if_f0 = cpt .get ("f0" , 1 )
58
+ if self . if_f0 == 1 :
59
59
self .net_g = SynthesizerTrnMs256NSFsid (* cpt ["config" ], is_half = True )
60
60
else :
61
61
self .net_g = SynthesizerTrnMs256NSFsid_nono (* cpt ["config" ])
@@ -136,27 +136,37 @@ def infer(self, feats: torch.Tensor) -> np.ndarray:
136
136
137
137
feats = F .interpolate (feats .permute (0 , 2 , 1 ), scale_factor = 2 ).permute (0 , 2 , 1 )
138
138
torch .cuda .synchronize ()
139
- # p_len = min(feats.shape[1],10000,pitch.shape[0])#太大了爆显存
140
- p_len = min (feats .shape [1 ], 12000 ) #
141
139
print (feats .shape )
142
- pitch , pitchf = self .get_f0 (audio , self .f0_up_key )
143
- p_len = min (feats .shape [1 ], 12000 , pitch .shape [0 ]) # 太大了爆显存
140
+ if (self .if_f0 == 1 ):
141
+ pitch , pitchf = self .get_f0 (audio , self .f0_up_key )
142
+ p_len = min (feats .shape [1 ], 13000 , pitch .shape [0 ]) # 太大了爆显存
143
+ else :
144
+ pitch , pitchf = None , None
145
+ p_len = min (feats .shape [1 ], 13000 ) # 太大了爆显存
144
146
torch .cuda .synchronize ()
145
147
# print(feats.shape,pitch.shape)
146
148
feats = feats [:, :p_len , :]
147
- pitch = pitch [:p_len ]
148
- pitchf = pitchf [:p_len ]
149
+ if (self .if_f0 == 1 ):
150
+ pitch = pitch [:p_len ]
151
+ pitchf = pitchf [:p_len ]
152
+ pitch = torch .LongTensor (pitch ).unsqueeze (0 ).to (device )
153
+ pitchf = torch .FloatTensor (pitchf ).unsqueeze (0 ).to (device )
149
154
p_len = torch .LongTensor ([p_len ]).to (device )
150
- pitch = torch .LongTensor (pitch ).unsqueeze (0 ).to (device )
151
- pitchf = torch .FloatTensor (pitchf ).unsqueeze (0 ).to (device )
152
155
ii = 0 # sid
153
156
sid = torch .LongTensor ([ii ]).to (device )
154
157
with torch .no_grad ():
155
- infered_audio = (
156
- self .net_g .infer (feats , p_len , pitch , pitchf , sid )[0 ][0 , 0 ]
157
- .data .cpu ()
158
- .float ()
159
- ) # nsf
158
+ if (self .if_f0 == 1 ):
159
+ infered_audio = (
160
+ self .net_g .infer (feats , p_len , pitch , pitchf , sid )[0 ][0 , 0 ]
161
+ .data .cpu ()
162
+ .float ()
163
+ )
164
+ else :
165
+ infered_audio = (
166
+ self .net_g .infer (feats , p_len , sid )[0 ][0 , 0 ]
167
+ .data .cpu ()
168
+ .float ()
169
+ )
160
170
torch .cuda .synchronize ()
161
171
return infered_audio
162
172
0 commit comments