Skip to content
This repository was archived by the owner on Apr 18, 2024. It is now read-only.

Commit 0e070a5

Browse files
committed
removed .npy dependency
1 parent 2ee8c18 commit 0e070a5

File tree

2 files changed

+54
-71
lines changed

2 files changed

+54
-71
lines changed

rvcgui.py

Lines changed: 16 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ def extract_model_from_zip(zip_path, output_dir):
4343

4444
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
4545
for member in zip_ref.namelist():
46-
if member.endswith('.pth') or member.endswith('.npy') or member.endswith('.index'):
46+
if member.endswith('.pth') or member.endswith('.index'):
4747
# Extract the file to the output folder
4848
zip_ref.extract(member, output_folder)
4949

@@ -95,7 +95,6 @@ def vc_single(
9595
f0_file,
9696
f0_method,
9797
file_index,
98-
file_big_npy,
9998
index_rate,
10099
output_path=None,
101100
): # spk_item, input_audio0, vc_transform0,f0_file,f0method0
@@ -117,10 +116,7 @@ def vc_single(
117116
.strip(" ")
118117
.replace("trained", "added")
119118
) # 防止小白写错,自动帮他替换掉
120-
file_big_npy = (
121-
file_big_npy.strip(" ").strip('"').strip(
122-
"\n").strip('"').strip(" ")
123-
)
119+
124120
audio_opt = vc.pipeline(
125121
hubert_model,
126122
net_g,
@@ -130,7 +126,6 @@ def vc_single(
130126
f0_up_key,
131127
f0_method,
132128
file_index,
133-
file_big_npy,
134129
index_rate,
135130
if_f0,
136131
f0_file=f0_file,
@@ -157,7 +152,6 @@ def vc_multi(
157152
f0_up_key,
158153
f0_method,
159154
file_index,
160-
file_big_npy,
161155
index_rate,
162156
):
163157
try:
@@ -185,7 +179,6 @@ def vc_multi(
185179
None,
186180
f0_method,
187181
file_index,
188-
file_big_npy,
189182
index_rate,
190183
)
191184
if info == "Success":
@@ -335,20 +328,20 @@ def on_button_click():
335328
f0_file = f0_file_entry.get()
336329
f0_method = f0_method_entry.get()
337330
file_index = file_index_entry.get()
338-
file_big_npy = file_big_npy_entry.get()
331+
# file_big_npy = file_big_npy_entry.get()
339332
index_rate = round(index_rate_entry.get(),2)
340333
global output_file
341334
output_file = get_output_path(input_audio)
342335
print("sid: ", sid, "input_audio: ", input_audio, "f0_pitch: ", f0_pitch, "f0_file: ", f0_file, "f0_method: ", f0_method,
343-
"file_index: ", file_index, "file_big_npy: ", file_big_npy, "index_rate: ", index_rate, "output_file: ", output_file)
336+
"file_index: ", file_index, "file_big_npy: ", "index_rate: ", index_rate, "output_file: ", output_file)
344337
# Call the vc_single function with the user input values
345338
if model_loaded == True and os.path.isfile(input_audio):
346339
try:
347340
loading_progress.pack(padx=10, pady=10)
348341
loading_progress.start()
349342

350343
result, audio_opt = vc_single(
351-
0, input_audio, f0_pitch, None, f0_method, file_index, file_big_npy, index_rate, output_file)
344+
0, input_audio, f0_pitch, None, f0_method, file_index, index_rate, output_file)
352345
# output_label.configure(text=result + "\n saved at" + output_file)
353346
print(os.path.join(output_file))
354347
if os.path.exists(output_file) and os.path.getsize(output_file) > 0:
@@ -409,33 +402,29 @@ def start_processing():
409402
def selected_model(choice):
410403

411404
file_index_entry.delete(0, ctk.END)
412-
file_big_npy_entry.delete(0, ctk.END)
413-
model_dir = os.path.normpath(os.path.join(models_dir, choice))
405+
406+
model_dir = os.path.join(models_dir, choice)
414407
pth_file = [f for f in os.listdir(model_dir) if os.path.isfile(
415408
os.path.join(model_dir, f)) and f.endswith(".pth")]
416409
if pth_file:
417410
global pth_file_path
418411
pth_file_path = os.path.join(model_dir, pth_file[0])
419412
npy_files = [f for f in os.listdir(model_dir) if os.path.isfile(
420-
os.path.join(model_dir, f)) and (f.endswith(".npy") or f.endswith(".index"))]
413+
os.path.join(model_dir, f)) and f.endswith(".index")]
421414
if npy_files:
422415
npy_files_dir = [os.path.join(model_dir, f) for f in npy_files]
423-
if len(npy_files_dir) == 2:
424-
index_file = [
425-
f for f in npy_files_dir if f.endswith(".index")][0]
426-
npy_file = [f for f in npy_files_dir if f.endswith(".npy")][0]
416+
if len(npy_files_dir) == 1:
417+
index_file = npy_files_dir[0]
427418
print(f".pth file directory: {pth_file_path}")
428419
print(f".index file directory: {index_file}")
429-
print(f".npy file directory: {npy_file}")
430420

431421
file_index_entry.insert(0, index_file)
432-
file_big_npy_entry.insert(0, npy_file)
433422

434423
else:
435424
print(
436-
f"Incomplete set of .npy and .index files found in {model_dir}")
425+
f"Incomplete set of .index files found in {model_dir}")
437426
else:
438-
print(f"No .npy or .index files found in {model_dir}")
427+
print(f"No .index files found in {model_dir}")
439428

440429
get_vc(pth_file_path, 0)
441430
global model_loaded
@@ -543,8 +532,8 @@ def update_config(selected):
543532
file_index_entry = ctk.CTkEntry(right_frame, width=250)
544533

545534
# intiilizing big npy file widget
546-
file_big_npy_label = ctk.CTkLabel(right_frame, text=".npy File (Recommended)")
547-
file_big_npy_entry = ctk.CTkEntry(right_frame, width=250)
535+
536+
548537

549538
# intiilizing index rate widget
550539
index_rate_entry = ctk.CTkSlider(
@@ -618,8 +607,8 @@ def update_config(selected):
618607
f0_file_entry.grid(padx=10, pady=10)
619608
file_index_label.grid(padx=10, pady=10)
620609
file_index_entry.grid(padx=10, pady=10)
621-
file_big_npy_label.grid(padx=10, pady=10)
622-
file_big_npy_entry.grid(padx=10, pady=10)
610+
611+
623612
index_rate_label.grid(padx=10, pady=10)
624613
index_rate_entry.grid(padx=10, pady=10)
625614
run_button.grid(padx=30, pady=30)

vc_infer_pipeline.py

Lines changed: 38 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,9 @@
1-
import numpy as np
2-
import parselmouth
3-
import torch
4-
import pdb
1+
import numpy as np, parselmouth, torch, pdb
52
from time import time as ttime
63
import torch.nn.functional as F
74
from config import x_pad, x_query, x_center, x_max
85
import scipy.signal as signal
9-
import pyworld
10-
import os
11-
import traceback
12-
import faiss
6+
import pyworld, os, traceback, faiss
137
from scipy import signal
148

159
bh, ah = signal.butter(N=5, Wn=48, btype="high", fs=16000)
@@ -70,8 +64,8 @@ def get_f0(self, x, p_len, f0_up_key, f0_method, inp_f0=None):
7064
replace_f0 = np.interp(
7165
list(range(delta_t)), inp_f0[:, 0] * 100, inp_f0[:, 1]
7266
)
73-
shape = f0[x_pad * tf0: x_pad * tf0 + len(replace_f0)].shape[0]
74-
f0[x_pad * tf0: x_pad * tf0 + len(replace_f0)] = replace_f0[:shape]
67+
shape = f0[x_pad * tf0 : x_pad * tf0 + len(replace_f0)].shape[0]
68+
f0[x_pad * tf0 : x_pad * tf0 + len(replace_f0)] = replace_f0[:shape]
7569
# with open("test_opt.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()]))
7670
f0bak = f0.copy()
7771
f0_mel = 1127 * np.log(1 + f0 / 700)
@@ -105,8 +99,7 @@ def vc(
10599
feats = feats.mean(-1)
106100
assert feats.dim() == 1, feats.dim()
107101
feats = feats.view(1, -1)
108-
padding_mask = torch.BoolTensor(
109-
feats.shape).to(self.device).fill_(False)
102+
padding_mask = torch.BoolTensor(feats.shape).to(self.device).fill_(False)
110103

111104
inputs = {
112105
"source": feats.to(self.device),
@@ -126,17 +119,23 @@ def vc(
126119
npy = feats[0].cpu().numpy()
127120
if self.is_half:
128121
npy = npy.astype("float32")
129-
_, I = index.search(npy, 1)
130-
npy = big_npy[I.squeeze()]
122+
123+
# _, I = index.search(npy, 1)
124+
# npy = big_npy[I.squeeze()]
125+
126+
score, ix = index.search(npy, k=8)
127+
weight = np.square(1 / score)
128+
weight /= weight.sum(axis=1, keepdims=True)
129+
npy = np.sum(big_npy[ix] * np.expand_dims(weight, axis=2), axis=1)
130+
131131
if self.is_half:
132132
npy = npy.astype("float16")
133133
feats = (
134134
torch.from_numpy(npy).unsqueeze(0).to(self.device) * index_rate
135135
+ (1 - index_rate) * feats
136136
)
137137

138-
feats = F.interpolate(feats.permute(0, 2, 1),
139-
scale_factor=2).permute(0, 2, 1)
138+
feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1)
140139
t1 = ttime()
141140
p_len = audio0.shape[0] // self.window
142141
if feats.shape[1] < p_len:
@@ -148,8 +147,7 @@ def vc(
148147
with torch.no_grad():
149148
if pitch != None and pitchf != None:
150149
audio1 = (
151-
(net_g.infer(feats, p_len, pitch,
152-
pitchf, sid)[0][0, 0] * 32768)
150+
(net_g.infer(feats, p_len, pitch, pitchf, sid)[0][0, 0] * 32768)
153151
.data.cpu()
154152
.float()
155153
.numpy()
@@ -181,41 +179,41 @@ def pipeline(
181179
f0_up_key,
182180
f0_method,
183181
file_index,
184-
file_big_npy,
182+
# file_big_npy,
185183
index_rate,
186184
if_f0,
187185
f0_file=None,
188186
):
189187
if (
190-
file_big_npy != ""
191-
and file_index != ""
192-
and os.path.exists(file_big_npy) == True
188+
file_index != ""
189+
# and file_big_npy != ""
190+
# and os.path.exists(file_big_npy) == True
193191
and os.path.exists(file_index) == True
194192
and index_rate != 0
195193
):
196194
try:
197195
index = faiss.read_index(file_index)
198-
big_npy = np.load(file_big_npy)
196+
# big_npy = np.load(file_big_npy)
197+
big_npy = index.reconstruct_n(0, index.ntotal)
199198
except:
200199
traceback.print_exc()
201200
index = big_npy = None
202201
else:
203202
index = big_npy = None
204203
audio = signal.filtfilt(bh, ah, audio)
205-
audio_pad = np.pad(
206-
audio, (self.window // 2, self.window // 2), mode="reflect")
204+
audio_pad = np.pad(audio, (self.window // 2, self.window // 2), mode="reflect")
207205
opt_ts = []
208206
if audio_pad.shape[0] > self.t_max:
209207
audio_sum = np.zeros_like(audio)
210208
for i in range(self.window):
211-
audio_sum += audio_pad[i: i - self.window]
209+
audio_sum += audio_pad[i : i - self.window]
212210
for t in range(self.t_center, audio.shape[0], self.t_center):
213211
opt_ts.append(
214212
t
215213
- self.t_query
216214
+ np.where(
217-
np.abs(audio_sum[t - self.t_query: t + self.t_query])
218-
== np.abs(audio_sum[t - self.t_query: t + self.t_query]).min()
215+
np.abs(audio_sum[t - self.t_query : t + self.t_query])
216+
== np.abs(audio_sum[t - self.t_query : t + self.t_query]).min()
219217
)[0][0]
220218
)
221219
s = 0
@@ -238,13 +236,11 @@ def pipeline(
238236
sid = torch.tensor(sid, device=self.device).unsqueeze(0).long()
239237
pitch, pitchf = None, None
240238
if if_f0 == 1:
241-
pitch, pitchf = self.get_f0(
242-
audio_pad, p_len, f0_up_key, f0_method, inp_f0)
239+
pitch, pitchf = self.get_f0(audio_pad, p_len, f0_up_key, f0_method, inp_f0)
243240
pitch = pitch[:p_len]
244241
pitchf = pitchf[:p_len]
245242
pitch = torch.tensor(pitch, device=self.device).unsqueeze(0).long()
246-
pitchf = torch.tensor(
247-
pitchf, device=self.device).unsqueeze(0).float()
243+
pitchf = torch.tensor(pitchf, device=self.device).unsqueeze(0).float()
248244
t2 = ttime()
249245
times[1] += t2 - t1
250246
for t in opt_ts:
@@ -255,31 +251,29 @@ def pipeline(
255251
model,
256252
net_g,
257253
sid,
258-
audio_pad[s: t + self.t_pad2 + self.window],
259-
pitch[:, s //
260-
self.window: (t + self.t_pad2) // self.window],
261-
pitchf[:, s //
262-
self.window: (t + self.t_pad2) // self.window],
254+
audio_pad[s : t + self.t_pad2 + self.window],
255+
pitch[:, s // self.window : (t + self.t_pad2) // self.window],
256+
pitchf[:, s // self.window : (t + self.t_pad2) // self.window],
263257
times,
264258
index,
265259
big_npy,
266260
index_rate,
267-
)[self.t_pad_tgt: -self.t_pad_tgt]
261+
)[self.t_pad_tgt : -self.t_pad_tgt]
268262
)
269263
else:
270264
audio_opt.append(
271265
self.vc(
272266
model,
273267
net_g,
274268
sid,
275-
audio_pad[s: t + self.t_pad2 + self.window],
269+
audio_pad[s : t + self.t_pad2 + self.window],
276270
None,
277271
None,
278272
times,
279273
index,
280274
big_npy,
281275
index_rate,
282-
)[self.t_pad_tgt: -self.t_pad_tgt]
276+
)[self.t_pad_tgt : -self.t_pad_tgt]
283277
)
284278
s = t
285279
if if_f0 == 1:
@@ -289,13 +283,13 @@ def pipeline(
289283
net_g,
290284
sid,
291285
audio_pad[t:],
292-
pitch[:, t // self.window:] if t is not None else pitch,
293-
pitchf[:, t // self.window:] if t is not None else pitchf,
286+
pitch[:, t // self.window :] if t is not None else pitch,
287+
pitchf[:, t // self.window :] if t is not None else pitchf,
294288
times,
295289
index,
296290
big_npy,
297291
index_rate,
298-
)[self.t_pad_tgt: -self.t_pad_tgt]
292+
)[self.t_pad_tgt : -self.t_pad_tgt]
299293
)
300294
else:
301295
audio_opt.append(
@@ -310,7 +304,7 @@ def pipeline(
310304
index,
311305
big_npy,
312306
index_rate,
313-
)[self.t_pad_tgt: -self.t_pad_tgt]
307+
)[self.t_pad_tgt : -self.t_pad_tgt]
314308
)
315309
audio_opt = np.concatenate(audio_opt)
316310
del pitch, pitchf, sid

0 commit comments

Comments
 (0)