Skip to content

Commit 297d92b

Browse files
authored
some change precision audio processing (#94)
* some change precision audio processing * fix clipping problem in resample resample sometimes causes signal clipping, not just librosa.resample * fix error
1 parent c423f77 commit 297d92b

File tree

4 files changed

+32
-9
lines changed

4 files changed

+32
-9
lines changed

extract_f0_print.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,9 @@ def __init__(self, samplerate=16000, hop_size=160):
3333
self.f0_mel_max = 1127 * np.log(1 + self.f0_max / 700)
3434

3535
def compute_f0(self, path, f0_method):
36-
x, sr = librosa.load(path, self.fs)
36+
# default resample type of librosa.resample is "soxr_hq".
37+
# Quality: soxr_vhq > soxr_hq
38+
x, sr = librosa.load(path, self.fs, res_type='soxr_vhq')
3739
p_len = x.shape[0] // self.hop
3840
assert sr == self.fs
3941
if f0_method == "pm":

my_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,10 @@ def load_audio(file, sr):
1212
) # 防止小白拷路径头尾带了空格和"和回车
1313
out, _ = (
1414
ffmpeg.input(file, threads=0)
15-
.output("-", format="s16le", acodec="pcm_s16le", ac=1, ar=sr)
15+
.output("-", format="f32le", acodec="pcm_f32le", ac=1, ar=sr)
1616
.run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
1717
)
1818
except Exception as e:
1919
raise RuntimeError(f"Failed to load audio: {e}")
2020

21-
return np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0
21+
return np.frombuffer(out, np.float32).flatten()

train/data_utils.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,10 @@ def get_audio(self, filename):
9898
sampling_rate, self.sampling_rate
9999
)
100100
)
101-
audio_norm = audio / self.max_wav_value
101+
audio_norm = audio
102+
# audio_norm = audio / self.max_wav_value
103+
# audio_norm = audio / np.abs(audio).max()
104+
102105
audio_norm = audio_norm.unsqueeze(0)
103106
spec_filename = filename.replace(".wav", ".spec.pt")
104107
if os.path.exists(spec_filename):
@@ -287,7 +290,10 @@ def get_audio(self, filename):
287290
sampling_rate, self.sampling_rate
288291
)
289292
)
290-
audio_norm = audio / self.max_wav_value
293+
audio_norm = audio
294+
# audio_norm = audio / self.max_wav_value
295+
# audio_norm = audio / np.abs(audio).max()
296+
291297
audio_norm = audio_norm.unsqueeze(0)
292298
spec_filename = filename.replace(".wav", ".spec.pt")
293299
if os.path.exists(spec_filename):

trainset_preprocess_pipeline_print.py

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,19 +59,34 @@ def norm_write(self, tmp_audio, idx0, idx1):
5959
wavfile.write(
6060
"%s/%s_%s.wav" % (self.gt_wavs_dir, idx0, idx1),
6161
self.sr,
62-
(tmp_audio * 32768).astype(np.int16),
62+
(tmp_audio * 1).astype(np.float32),
6363
)
64-
tmp_audio = librosa.resample(tmp_audio, orig_sr=self.sr, target_sr=16000)
64+
65+
# default resample type of librosa.resample is "soxr_hq".
66+
# Quality: soxr_vhq > soxr_hq
67+
tmp_audio = librosa.resample(tmp_audio, orig_sr=self.sr, target_sr=16000, res_type="soxr_vhq")
68+
tmp_audio = (tmp_audio / np.abs(tmp_audio).max() * (self.max * self.alpha)) + (
69+
1 - self.alpha
70+
) * tmp_audio
71+
wavfile.write(
72+
"%s/%s_%s.wav" % (self.gt_wavs_dir, idx0, idx1),
73+
self.sr,
74+
(tmp_audio * 1).astype(np.float32),
75+
)
76+
6577
wavfile.write(
6678
"%s/%s_%s.wav" % (self.wavs16k_dir, idx0, idx1),
6779
16000,
68-
(tmp_audio * 32768).astype(np.int16),
80+
(tmp_audio * 1).astype(np.float32),
6981
)
7082

7183
def pipeline(self, path, idx0):
7284
try:
7385
audio = load_audio(path, self.sr)
74-
audio = signal.filtfilt(self.bh, self.ah, audio)
86+
# zero phased digital filter cause pre-ringing noise...
87+
# audio = signal.filtfilt(self.bh, self.ah, audio)
88+
audio = signal.lfilter(self.bh, self.ah, audio)
89+
7590
idx1 = 0
7691
for audio in self.slicer.slice(audio):
7792
i = 0

0 commit comments

Comments
 (0)