|
| 1 | +from __future__ import annotations |
| 2 | + |
| 3 | +import logging |
| 4 | +import subprocess |
| 5 | +from pathlib import Path |
| 6 | + |
| 7 | +logger = logging.getLogger(__name__) |
| 8 | + |
| 9 | +HIGHPASS_CUTOFF_HZ = 80 |
| 10 | +NOISE_PROP_DECREASE = 0.75 |
| 11 | +TARGET_LUFS = -23.0 |
| 12 | + |
| 13 | +SOUNDFILE_FORMATS = {".wav", ".flac", ".ogg", ".aiff", ".aif"} |
| 14 | + |
| 15 | + |
| 16 | +def _convert_to_wav(audio_path: Path) -> Path: |
| 17 | + """Convert non-WAV audio to WAV using ffmpeg. Returns path to the converted file.""" |
| 18 | + wav_path = audio_path.parent / "audio_converted.wav" |
| 19 | + subprocess.run( |
| 20 | + ["ffmpeg", "-y", "-i", str(audio_path), "-ar", "16000", "-ac", "1", str(wav_path)], |
| 21 | + check=True, |
| 22 | + capture_output=True, |
| 23 | + ) |
| 24 | + return wav_path |
| 25 | + |
| 26 | + |
| 27 | +def preprocess_audio(audio_path: Path) -> Path: |
| 28 | + """Apply audio preprocessing: high-pass filter, noise reduction, loudness normalization. |
| 29 | +
|
| 30 | + Returns the path to the preprocessed WAV file (saved alongside the original). |
| 31 | + """ |
| 32 | + import numpy as np |
| 33 | + import soundfile as sf |
| 34 | + from scipy.signal import butter, sosfilt |
| 35 | + |
| 36 | + logger.info("Preprocessing audio: %s", audio_path.name) |
| 37 | + |
| 38 | + converted_path = None |
| 39 | + if audio_path.suffix.lower() not in SOUNDFILE_FORMATS: |
| 40 | + logger.info("Converting %s to WAV via ffmpeg", audio_path.suffix) |
| 41 | + converted_path = _convert_to_wav(audio_path) |
| 42 | + read_path = converted_path |
| 43 | + else: |
| 44 | + read_path = audio_path |
| 45 | + |
| 46 | + data, sample_rate = sf.read(read_path, dtype="float64") |
| 47 | + |
| 48 | + # Convert stereo to mono if needed |
| 49 | + if data.ndim > 1: |
| 50 | + data = np.mean(data, axis=1) |
| 51 | + |
| 52 | + # 1. High-pass filter (80 Hz, 4th-order Butterworth) |
| 53 | + sos = butter(4, HIGHPASS_CUTOFF_HZ, btype="high", fs=sample_rate, output="sos") |
| 54 | + data = sosfilt(sos, data) |
| 55 | + |
| 56 | + # 2. Noise reduction (conservative) |
| 57 | + import noisereduce as nr |
| 58 | + |
| 59 | + data = nr.reduce_noise( |
| 60 | + y=data, |
| 61 | + sr=sample_rate, |
| 62 | + prop_decrease=NOISE_PROP_DECREASE, |
| 63 | + stationary=False, |
| 64 | + ) |
| 65 | + |
| 66 | + # 3. Loudness normalization to -23 LUFS |
| 67 | + import pyloudnorm as pyln |
| 68 | + |
| 69 | + meter = pyln.Meter(sample_rate) |
| 70 | + loudness = meter.integrated_loudness(data) |
| 71 | + |
| 72 | + if not np.isinf(loudness): |
| 73 | + data = pyln.normalize.loudness(data, loudness, TARGET_LUFS) |
| 74 | + |
| 75 | + # Save preprocessed copy |
| 76 | + output_path = audio_path.parent / "audio_preprocessed.wav" |
| 77 | + sf.write(str(output_path), data, sample_rate) |
| 78 | + |
| 79 | + if converted_path and converted_path.exists(): |
| 80 | + converted_path.unlink() |
| 81 | + |
| 82 | + logger.info("Preprocessed audio saved: %s", output_path.name) |
| 83 | + return output_path |
0 commit comments