Skip to content

Commit 64f994b

Browse files
haofanurusaifumiama
authored andcommitted
fix&feat(gui): 修复ASIO输出爆音, 增加不同种API音频设备支持
See RVC-Project/Retrieval-based-Voice-Conversion-WebUI#2591
1 parent 6223116 commit 64f994b

File tree

4 files changed

+274
-29
lines changed

4 files changed

+274
-29
lines changed

gui.py

Lines changed: 107 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,11 @@ def run(self):
7676
import json
7777
import multiprocessing
7878
import re
79+
import threading
7980
import time
8081
from multiprocessing import Queue, cpu_count
82+
from infer.lib.audio import AudioIoProcess
83+
from multiprocessing.shared_memory import SharedMemory
8184

8285
import librosa
8386
from infer.modules.gui import TorchGate
@@ -144,6 +147,15 @@ def __init__(self) -> None:
144147
self.input_devices_indices = None
145148
self.output_devices_indices = None
146149
self.stream = None
150+
self.in_mem = None
151+
self.out_mem = None
152+
self.in_buf = None
153+
self.out_buf = None
154+
self.in_ptr = None
155+
self.out_ptr = None
156+
self.play_ptr = None
157+
self.in_evt = None
158+
self.stop_evt = None
147159
self.update_devices()
148160
self.launcher()
149161

@@ -564,7 +576,7 @@ def event_handler(self):
564576
event, values = self.window.read()
565577
if event == sg.WINDOW_CLOSED:
566578
self.stop_stream()
567-
exit()
579+
# exit()
568580
if event == "reload_devices" or event == "sg_hostapi":
569581
self.gui_config.sg_hostapi = values["sg_hostapi"]
570582
self.update_devices(hostapi_name=values["sg_hostapi"])
@@ -639,7 +651,7 @@ def event_handler(self):
639651
json.dump(settings, j)
640652
if self.stream is not None:
641653
self.delay_time = (
642-
self.stream.latency[-1]
654+
self.stream.get_latency()
643655
+ values["block_time"]
644656
+ values["crossfade_length"]
645657
+ 0.01
@@ -667,7 +679,7 @@ def event_handler(self):
667679
self.rvc.set_index_rate(values["index_rate"])
668680
elif event == "rms_mix_rate":
669681
self.gui_config.rms_mix_rate = values["rms_mix_rate"]
670-
elif event in ["pm", "dio", "harvest", "crepe", "rmvpe", "fcpe"]:
682+
elif event in ["pm", "harvest", "crepe", "rmvpe", "fcpe"]:
671683
self.gui_config.f0method = event
672684
elif event == "I_noise_reduce":
673685
self.gui_config.I_noise_reduce = values["I_noise_reduce"]
@@ -867,36 +879,80 @@ def start_stream(self):
867879
"WASAPI" in self.gui_config.sg_hostapi
868880
and self.gui_config.sg_wasapi_exclusive
869881
):
870-
extra_settings = sd.WasapiSettings(exclusive=True)
882+
wasapi_exclusive = True
871883
else:
872-
extra_settings = None
873-
self.stream = sd.Stream(
874-
callback=self.audio_callback,
875-
blocksize=self.block_frame,
876-
samplerate=self.gui_config.samplerate,
877-
channels=self.gui_config.channels,
878-
dtype="float32",
879-
extra_settings=extra_settings,
884+
wasapi_exclusive = False
885+
self.stream = AudioIoProcess(
886+
input_device=sd.default.device[0],
887+
output_device=sd.default.device[1],
888+
input_audio_block_size = self.block_frame,
889+
sample_rate = self.gui_config.samplerate,
890+
channel_num=self.gui_config.channels,
891+
is_input_wasapi_exclusive=wasapi_exclusive,
892+
is_output_wasapi_exclusive=wasapi_exclusive,
893+
is_device_combined = True
894+
# TODO: Add control UI to allow devices with different type API & different WASAPI settings
880895
)
896+
self.in_mem = SharedMemory(name=self.stream.get_in_mem_name())
897+
self.out_mem = SharedMemory(name=self.stream.get_out_mem_name())
898+
self.in_buf = np.ndarray(
899+
self.stream.get_np_shape(),
900+
dtype=self.stream.get_np_dtype(),
901+
buffer=self.in_mem.buf,
902+
order='C'
903+
)
904+
self.out_buf = np.ndarray(
905+
self.stream.get_np_shape(),
906+
dtype=self.stream.get_np_dtype(),
907+
buffer=self.out_mem.buf,
908+
order='C'
909+
)
910+
self.in_ptr, \
911+
self.out_ptr, \
912+
self.play_ptr, \
913+
self.in_evt, \
914+
self.stop_evt = self.stream.get_ptrs_and_events()
915+
881916
self.stream.start()
882917

918+
def audio_loop():
919+
while flag_vc:
920+
self.audio_infer(self.block_frame << 1)
921+
922+
threading.Thread(
923+
target=audio_loop,
924+
daemon=True
925+
).start()
926+
883927
def stop_stream(self):
884928
global flag_vc
885929
if flag_vc:
886930
flag_vc = False
887931
if self.stream is not None:
888-
self.stream.abort()
889-
self.stream.close()
932+
print("Exiting")
933+
self.stop_evt.set()
934+
self.in_mem.close()
935+
self.out_mem.close()
936+
self.stream.join()
890937
self.stream = None
891938

892-
def audio_callback(
893-
self, indata: np.ndarray, outdata: np.ndarray, frames, times, status
939+
def audio_infer(
940+
self, buf_size:int # 2 * self.block_frame
894941
):
895942
"""
896943
音频处理
897944
"""
898945
global flag_vc
946+
947+
self.in_evt.wait()
948+
rptr = self.in_ptr.value
949+
self.in_evt.clear()
950+
899951
start_time = time.perf_counter()
952+
953+
rend = rptr + self.block_frame
954+
indata = np.copy(self.in_buf[rptr:rend])
955+
900956
indata = librosa.to_mono(indata.T)
901957
if self.gui_config.threhold > -60:
902958
indata = np.append(self.rms_buffer, indata)
@@ -1039,13 +1095,47 @@ def audio_callback(
10391095
self.sola_buffer[:] = infer_wav[
10401096
self.block_frame : self.block_frame + self.sola_buffer_frame
10411097
]
1042-
outdata[:] = (
1098+
outdata = (
10431099
infer_wav[: self.block_frame]
10441100
.repeat(self.gui_config.channels, 1)
10451101
.t()
10461102
.cpu()
10471103
.numpy()
10481104
)
1105+
1106+
# 装填输出缓冲
1107+
start = self.out_ptr.value
1108+
play_pos = self.play_ptr.value
1109+
1110+
# 计算播放进度差(写指针距离播放指针的帧数)
1111+
delta = (start - play_pos + buf_size) % buf_size
1112+
1113+
if delta < self.block_frame:
1114+
# 装填赶不上播放,导致播放进度追上来了,
1115+
# 此时已产生无法挽回的破音,
1116+
# 只好直接卡着播放指针写入,保证接下来的尽快放出来
1117+
print("[W] Output underrun")
1118+
write_pos = play_pos
1119+
else:
1120+
# 否则按块对齐
1121+
write_pos = (start + self.block_frame) % buf_size
1122+
1123+
# 写入共享缓冲区
1124+
end = (write_pos + self.block_frame) % buf_size
1125+
if end > write_pos:
1126+
self.out_buf[write_pos:end] = outdata
1127+
else:
1128+
first = buf_size - write_pos
1129+
self.out_buf[write_pos:] = outdata[:first]
1130+
self.out_buf[:end] = outdata[first:]
1131+
1132+
# 更新写指针
1133+
self.out_ptr.value = write_pos
1134+
1135+
if self.in_evt.is_set():
1136+
print("[W] Input overrun")
1137+
self.in_evt.clear()
1138+
10491139
total_time = time.perf_counter() - start_time
10501140
if flag_vc:
10511141
self.window["infer_time"].update(int(total_time * 1000))

infer/lib/audio.py

Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44
import os
55
import math
66
import wave
7+
import signal
8+
from multiprocessing import Process, Value, Event
9+
from multiprocessing.shared_memory import SharedMemory
710

811
import numpy as np
912
from numba import jit
@@ -195,3 +198,167 @@ def get_audio_properties(input_path: str) -> Tuple[int, int]:
195198
rate = audio_stream.base_rate
196199
container.close()
197200
return channels, rate
201+
202+
class AudioIoProcess(Process):
203+
def __init__(self,
204+
input_device,
205+
output_device,
206+
input_audio_block_size: int,
207+
sample_rate: int,
208+
channel_num: int = 2,
209+
is_device_combined: bool = True,
210+
is_input_wasapi_exclusive: bool = False,
211+
is_output_wasapi_exclusive: bool = False
212+
):
213+
super().__init__()
214+
self.in_dev = input_device
215+
self.out_dev = output_device
216+
self.block_size: int = input_audio_block_size
217+
self.buf_size: int = self.block_size << 1 # 双缓冲
218+
self.sample_rate: int = sample_rate
219+
self.channels: int = channel_num
220+
self.is_device_combined: bool = is_device_combined
221+
self.is_input_wasapi_exclusive: bool = is_input_wasapi_exclusive
222+
self.is_output_wasapi_exclusive: bool = is_output_wasapi_exclusive
223+
224+
self.__rec_ptr = 0
225+
self.in_ptr = Value('i', 0) # 当收满一个block时由本进程设置
226+
self.out_ptr = Value('i', 0) # 由主进程设置,指示下一次预期写入位置
227+
self.play_ptr = Value('i', 0) # 由本进程设置,指示当前音频已经播放到哪里
228+
self.in_evt = Event() # 当收满一个block时由本进程设置
229+
self.stop_evt = Event() # 当主进程停止音频活动时由主进程设置
230+
231+
self.latency = Value('d', 114514.1919810)
232+
233+
self.buf_shape: tuple = (self.buf_size, self.channels)
234+
self.buf_dtype: np.dtype = np.float32
235+
self.buf_nbytes: int = int(
236+
np.prod(self.buf_shape) * np.dtype(self.buf_dtype).itemsize)
237+
238+
self.in_mem = SharedMemory(create=True, size=self.buf_nbytes)
239+
self.out_mem = SharedMemory(create=True, size=self.buf_nbytes)
240+
self.in_mem_name: str = self.in_mem.name
241+
self.out_mem_name: str = self.out_mem.name
242+
243+
self.in_buf = None
244+
self.out_buf = None
245+
246+
def get_in_mem_name(self) -> str:
247+
return self.in_mem_name
248+
249+
def get_out_mem_name(self) -> str:
250+
return self.out_mem_name
251+
252+
def get_np_shape(self) -> tuple:
253+
return self.buf_shape
254+
255+
def get_np_dtype(self) -> np.dtype:
256+
return self.buf_dtype
257+
258+
def get_ptrs_and_events(self):
259+
return self.in_ptr, \
260+
self.out_ptr,\
261+
self.play_ptr,\
262+
self.in_evt, \
263+
self.stop_evt\
264+
265+
def get_latency(self) -> float:
266+
return self.latency.value
267+
268+
def run(self):
269+
import sounddevice as sd
270+
271+
signal.signal(signal.SIGINT, signal.SIG_IGN)
272+
273+
in_mem = SharedMemory(name=self.in_mem_name)
274+
self.in_buf = np.ndarray(
275+
self.buf_shape, dtype=self.buf_dtype, buffer=in_mem.buf, order='C')
276+
self.in_buf.fill(0.0)
277+
278+
out_mem = SharedMemory(name=self.out_mem_name)
279+
self.out_buf = np.ndarray(
280+
self.buf_shape, dtype=self.buf_dtype, buffer=out_mem.buf, order='C')
281+
self.out_buf.fill(0.0)
282+
283+
exclusive_settings = sd.WasapiSettings(exclusive=True)
284+
285+
sd.default.device = (self.in_dev, self.out_dev)
286+
287+
def output_callback(outdata, frames, time_info, status):
288+
play_ptr = self.play_ptr.value
289+
end_ptr = play_ptr + frames
290+
291+
if end_ptr <= self.buf_size:
292+
outdata[:] = self.out_buf[play_ptr:end_ptr]
293+
else:
294+
first = self.buf_size - play_ptr
295+
second = end_ptr - self.buf_size
296+
outdata[:first] = self.out_buf[play_ptr:]
297+
outdata[first:] = self.out_buf[:second]
298+
299+
self.play_ptr.value = end_ptr % self.buf_size
300+
301+
def input_callback(indata, frames, time_info, status):
302+
# 收录输入数据
303+
end_ptr = self.__rec_ptr + frames
304+
if end_ptr <= self.buf_size: # 整块拷贝
305+
self.in_buf[self.__rec_ptr:end_ptr] = indata
306+
else: # 处理回绕
307+
first = self.buf_size - self.__rec_ptr
308+
second = end_ptr - self.buf_size
309+
self.in_buf[self.__rec_ptr:] = indata[:first]
310+
self.in_buf[:second] = indata[first:]
311+
write_pos = self.__rec_ptr
312+
self.__rec_ptr = end_ptr % self.buf_size
313+
314+
# 设置信号
315+
if write_pos < self.block_size and self.__rec_ptr >= self.block_size:
316+
self.in_ptr.value = 0
317+
self.in_evt.set() # 通知主线程来取甲缓冲
318+
elif write_pos < self.buf_size and self.__rec_ptr < write_pos:
319+
self.in_ptr.value = self.block_size
320+
self.in_evt.set() # 通知主线程来取乙缓冲
321+
322+
def combined_callback(indata, outdata, frames, time_info, status):
323+
output_callback(outdata, frames, time_info, status) # 优先出声
324+
input_callback(indata, frames, time_info, status)
325+
326+
if self.is_device_combined:
327+
with sd.Stream(
328+
samplerate=self.sample_rate,
329+
channels=self.channels,
330+
dtype=self.buf_dtype,
331+
latency='low',
332+
extra_settings=exclusive_settings if
333+
self.is_input_wasapi_exclusive and
334+
self.is_output_wasapi_exclusive else None,
335+
callback=combined_callback
336+
) as s:
337+
self.latency.value = s.latency[-1]
338+
self.stop_evt.wait()
339+
self.out_buf.fill(0.0)
340+
else:
341+
with sd.InputStream(
342+
samplerate=self.sample_rate,
343+
channels=self.channels,
344+
dtype=self.buf_dtype,
345+
latency='low',
346+
extra_settings=exclusive_settings if self.is_input_wasapi_exclusive else None,
347+
callback=input_callback
348+
) as si, sd.OutputStream(
349+
samplerate=self.sample_rate,
350+
channels=self.channels,
351+
dtype=self.buf_dtype,
352+
latency='low',
353+
extra_settings=exclusive_settings if self.is_output_wasapi_exclusive else None,
354+
callback=output_callback
355+
) as so:
356+
self.latency.value = si.latency[-1] + so.latency[-1]
357+
self.stop_evt.wait()
358+
self.out_buf.fill(0.0)
359+
360+
# 清理共享内存
361+
in_mem.close()
362+
out_mem.close()
363+
in_mem.unlink()
364+
out_mem.unlink()

requirements/gui-dml.txt

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,6 @@
44
#torch 1.11.0 with cuda 11.3
55
#pip install torch==1.11.0+cu113 torchvision==0.12.0+cu113 torchaudio==0.11.0 --extra-index-url https://download.pytorch.org/whl/cu113
66
einops
7-
fairseq
8-
flask
9-
flask_cors
10-
gin
11-
gin_config
127
librosa
138
local_attention
149
matplotlib
@@ -20,7 +15,6 @@ scikit_learn
2015
scipy
2116
tensorboard
2217
tqdm
23-
wave
2418
FreeSimpleGUI
2519
sounddevice
2620
gradio

0 commit comments

Comments
 (0)