|
1 | 1 | import os |
2 | | -import warnings |
3 | 2 | from dataclasses import dataclass, field |
4 | 3 | from io import BytesIO |
5 | 4 | from typing import TYPE_CHECKING, Any, ClassVar, Dict, Optional, Union |
6 | 5 |
|
7 | 6 | import numpy as np |
8 | 7 | import pyarrow as pa |
9 | | -from packaging import version |
10 | 8 |
|
11 | 9 | from .. import config |
12 | | -from ..download.streaming_download_manager import xopen |
| 10 | +from ..download.streaming_download_manager import xopen, xsplitext |
13 | 11 | from ..table import array_cast |
14 | 12 | from ..utils.py_utils import no_op_if_value_is_null, string_to_dict |
15 | 13 |
|
@@ -150,20 +148,47 @@ def decode_example( |
150 | 148 | path, file = (value["path"], BytesIO(value["bytes"])) if value["bytes"] is not None else (value["path"], None) |
151 | 149 | if path is None and file is None: |
152 | 150 | raise ValueError(f"An audio sample should have one of 'path' or 'bytes' but both are None in {value}.") |
153 | | - elif path is not None and path.endswith("mp3"): |
154 | | - array, sampling_rate = self._decode_mp3(file if file else path) |
155 | | - elif path is not None and path.endswith("opus"): |
156 | | - if file: |
157 | | - array, sampling_rate = self._decode_non_mp3_file_like(file, "opus") |
158 | | - else: |
159 | | - array, sampling_rate = self._decode_non_mp3_path_like( |
160 | | - path, "opus", token_per_repo_id=token_per_repo_id |
161 | | - ) |
| 151 | + |
| 152 | + try: |
| 153 | + import librosa |
| 154 | + import soundfile as sf |
| 155 | + except ImportError as err: |
| 156 | + raise ImportError("To support decoding audio files, please install 'librosa' and 'soundfile'.") from err |
| 157 | + |
| 158 | + audio_format = xsplitext(path)[1][1:].lower() if path is not None else None |
| 159 | + if not config.IS_OPUS_SUPPORTED and audio_format == "opus": |
| 160 | + raise RuntimeError( |
| 161 | + "Decoding 'opus' files requires system library 'libsndfile'>=1.0.31, " |
| 162 | + 'You can try to update `soundfile` python library: `pip install "soundfile>=0.12.1"`. ' |
| 163 | + ) |
| 164 | + elif not config.IS_MP3_SUPPORTED and audio_format == "mp3": |
| 165 | + raise RuntimeError( |
| 166 | + "Decoding 'mp3' files requires system library 'libsndfile'>=1.1.0, " |
| 167 | + 'You can try to update `soundfile` python library: `pip install "soundfile>=0.12.1"`. ' |
| 168 | + ) |
| 169 | + |
| 170 | + if file is None: |
| 171 | + token_per_repo_id = token_per_repo_id or {} |
| 172 | + source_url = path.split("::")[-1] |
| 173 | + try: |
| 174 | + repo_id = string_to_dict(source_url, config.HUB_DATASETS_URL)["repo_id"] |
| 175 | + use_auth_token = token_per_repo_id[repo_id] |
| 176 | + except (ValueError, KeyError): |
| 177 | + use_auth_token = None |
| 178 | + |
| 179 | + with xopen(path, "rb", use_auth_token=use_auth_token) as f: |
| 180 | + array, sampling_rate = sf.read(f) |
| 181 | + |
162 | 182 | else: |
163 | | - if file: |
164 | | - array, sampling_rate = self._decode_non_mp3_file_like(file) |
165 | | - else: |
166 | | - array, sampling_rate = self._decode_non_mp3_path_like(path, token_per_repo_id=token_per_repo_id) |
| 183 | + array, sampling_rate = sf.read(file) |
| 184 | + |
| 185 | + array = array.T |
| 186 | + if self.mono: |
| 187 | + array = librosa.to_mono(array) |
| 188 | + if self.sampling_rate and self.sampling_rate != sampling_rate: |
| 189 | + array = librosa.resample(array, orig_sr=sampling_rate, target_sr=self.sampling_rate) |
| 190 | + sampling_rate = self.sampling_rate |
| 191 | + |
167 | 192 | return {"path": path, "array": array, "sampling_rate": sampling_rate} |
168 | 193 |
|
169 | 194 | def flatten(self) -> Union["FeatureType", Dict[str, "FeatureType"]]: |
@@ -242,129 +267,3 @@ def path_to_bytes(path): |
242 | 267 | ) |
243 | 268 | storage = pa.StructArray.from_arrays([bytes_array, path_array], ["bytes", "path"], mask=bytes_array.is_null()) |
244 | 269 | return array_cast(storage, self.pa_type) |
245 | | - |
246 | | - def _decode_non_mp3_path_like( |
247 | | - self, path, format=None, token_per_repo_id: Optional[Dict[str, Union[str, bool, None]]] = None |
248 | | - ): |
249 | | - try: |
250 | | - import librosa |
251 | | - except ImportError as err: |
252 | | - raise ImportError("To support decoding audio files, please install 'librosa'.") from err |
253 | | - |
254 | | - token_per_repo_id = token_per_repo_id or {} |
255 | | - if format == "opus": |
256 | | - import soundfile |
257 | | - |
258 | | - if version.parse(soundfile.__libsndfile_version__) < version.parse("1.0.30"): |
259 | | - raise RuntimeError( |
260 | | - "Decoding .opus files requires 'libsndfile'>=1.0.30, " |
261 | | - + "it can be installed via conda: `conda install -c conda-forge libsndfile>=1.0.30`" |
262 | | - ) |
263 | | - source_url = path.split("::")[-1] |
264 | | - try: |
265 | | - repo_id = string_to_dict(source_url, config.HUB_DATASETS_URL)["repo_id"] |
266 | | - use_auth_token = token_per_repo_id[repo_id] |
267 | | - except (ValueError, KeyError): |
268 | | - use_auth_token = None |
269 | | - |
270 | | - with xopen(path, "rb", use_auth_token=use_auth_token) as f: |
271 | | - array, sampling_rate = librosa.load(f, sr=self.sampling_rate, mono=self.mono) |
272 | | - return array, sampling_rate |
273 | | - |
274 | | - def _decode_non_mp3_file_like(self, file, format=None): |
275 | | - try: |
276 | | - import librosa |
277 | | - import soundfile as sf |
278 | | - except ImportError as err: |
279 | | - raise ImportError("To support decoding audio files, please install 'librosa' and 'soundfile'.") from err |
280 | | - |
281 | | - if format == "opus": |
282 | | - if version.parse(sf.__libsndfile_version__) < version.parse("1.0.30"): |
283 | | - raise RuntimeError( |
284 | | - "Decoding .opus files requires 'libsndfile'>=1.0.30, " |
285 | | - + 'it can be installed via conda: `conda install -c conda-forge "libsndfile>=1.0.30"`' |
286 | | - ) |
287 | | - array, sampling_rate = sf.read(file) |
288 | | - array = array.T |
289 | | - if self.mono: |
290 | | - array = librosa.to_mono(array) |
291 | | - if self.sampling_rate and self.sampling_rate != sampling_rate: |
292 | | - array = librosa.resample(array, orig_sr=sampling_rate, target_sr=self.sampling_rate) |
293 | | - sampling_rate = self.sampling_rate |
294 | | - return array, sampling_rate |
295 | | - |
296 | | - def _decode_mp3(self, path_or_file): |
297 | | - try: |
298 | | - import torchaudio |
299 | | - except ImportError as err: |
300 | | - raise ImportError("To support decoding 'mp3' audio files, please install 'torchaudio'.") from err |
301 | | - if version.parse(torchaudio.__version__) < version.parse("0.12.0"): |
302 | | - try: |
303 | | - torchaudio.set_audio_backend("sox_io") |
304 | | - except RuntimeError as err: |
305 | | - raise ImportError("To support decoding 'mp3' audio files, please install 'sox'.") from err |
306 | | - array, sampling_rate = self._decode_mp3_torchaudio(path_or_file) |
307 | | - else: |
308 | | - try: # try torchaudio anyway because sometimes it works (depending on the os and os packages installed) |
309 | | - array, sampling_rate = self._decode_mp3_torchaudio(path_or_file) |
310 | | - except RuntimeError: |
311 | | - global _ffmpeg_warned |
312 | | - if not _ffmpeg_warned: |
313 | | - warnings.warn( |
314 | | - "\nTo support 'mp3' decoding with `torchaudio>=0.12.0`, make sure you have `ffmpeg` system package with at least version 4 installed. " |
315 | | - "Alternatively, you can downgrade `torchaudio`:\n\n" |
316 | | - "\tpip install \"torchaudio<0.12\".\n\nOtherwise 'mp3' files will be decoded with `librosa`." |
317 | | - ) |
318 | | - _ffmpeg_warned = True |
319 | | - try: |
320 | | - # flake8: noqa |
321 | | - import librosa |
322 | | - except ImportError as err: |
323 | | - raise ImportError( |
324 | | - "\nTo support 'mp3' decoding with `torchaudio>=0.12.0`, make sure you have `ffmpeg` system package with at least version 4 installed. " |
325 | | - "\tpip install \"torchaudio<0.12\".\n\nTo decode 'mp3' files without `torchaudio`, please install `librosa`:\n\n" |
326 | | - "\tpip install librosa\n\nNote that decoding might be extremely slow in that case." |
327 | | - ) from err |
328 | | - # try to decode with librosa for torchaudio>=0.12.0 as a workaround |
329 | | - global _librosa_warned |
330 | | - if not _librosa_warned: |
331 | | - warnings.warn("Decoding mp3 with `librosa` instead of `torchaudio`, decoding might be slow.") |
332 | | - _librosa_warned = True |
333 | | - try: |
334 | | - array, sampling_rate = self._decode_mp3_librosa(path_or_file) |
335 | | - except RuntimeError as err: |
336 | | - raise RuntimeError( |
337 | | - "Decoding of 'mp3' failed, probably because of streaming mode " |
338 | | - "(`librosa` cannot decode 'mp3' file-like objects, only path-like)." |
339 | | - ) from err |
340 | | - |
341 | | - return array, sampling_rate |
342 | | - |
343 | | - def _decode_mp3_torchaudio(self, path_or_file): |
344 | | - import torchaudio |
345 | | - import torchaudio.transforms as T |
346 | | - |
347 | | - array, sampling_rate = torchaudio.load(path_or_file, format="mp3") |
348 | | - if self.sampling_rate and self.sampling_rate != sampling_rate: |
349 | | - if not hasattr(self, "_resampler") or self._resampler.orig_freq != sampling_rate: |
350 | | - self._resampler = T.Resample(sampling_rate, self.sampling_rate) |
351 | | - array = self._resampler(array) |
352 | | - sampling_rate = self.sampling_rate |
353 | | - array = array.numpy() |
354 | | - if self.mono: |
355 | | - array = array.mean(axis=0) |
356 | | - return array, sampling_rate |
357 | | - |
358 | | - def _decode_mp3_librosa(self, path_or_file): |
359 | | - import librosa |
360 | | - |
361 | | - global _audioread_warned |
362 | | - |
363 | | - with warnings.catch_warnings(): |
364 | | - if _audioread_warned: |
365 | | - warnings.filterwarnings("ignore", "pysoundfile failed.+?", UserWarning, module=librosa.__name__) |
366 | | - else: |
367 | | - _audioread_warned = True |
368 | | - array, sampling_rate = librosa.load(path_or_file, mono=self.mono, sr=self.sampling_rate) |
369 | | - |
370 | | - return array, sampling_rate |
0 commit comments