Skip to content

Commit c1ff862

Browse files
Merge pull request #125 from qanastek/main
Make the acoustics simulation modular
2 parents 54d8ec8 + 7e8b001 commit c1ff862

File tree

6 files changed

+262
-38
lines changed

6 files changed

+262
-38
lines changed

src/sdialog/audio/pipeline.py

Lines changed: 64 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -52,18 +52,19 @@
5252
from tqdm import tqdm
5353
import soundfile as sf
5454
from datasets import load_dataset
55-
from typing import List, Optional, Union, Callable
55+
from typing import List, Optional, Union, Callable, Any
5656

5757
from sdialog import Dialog
5858
from sdialog.audio.utils import logger
5959
from sdialog.audio.dialog import AudioDialog
6060
from sdialog.audio.processing import AudioProcessor
61+
from sdialog.audio import generate_utterances_audios
6162
from sdialog.audio.normalizers import normalize_audio
6263
from sdialog.audio.jsalt import MedicalRoomGenerator, RoomRole
6364
from sdialog.audio.room import Room, RoomPosition, DirectivityType
6465
from sdialog.audio.tts import BaseTTS, Qwen3TTS, Qwen3TTSVoiceClone
66+
from sdialog.audio.room_acoustics_backends import resolve_room_acoustics_backend
6567
from sdialog.audio.voice_database import Voice, BaseVoiceDatabase, HuggingfaceVoiceDatabase
66-
from sdialog.audio import generate_utterances_audios, generate_audio_room_accoustic
6768
from sdialog.audio.impulse_response_database import ImpulseResponseDatabase, RecordingDevice
6869
from sdialog.audio.utils import (
6970
Role,
@@ -108,7 +109,9 @@ def to_audio(
108109
remove_silences: Optional[bool] = True,
109110
normalize: Optional[bool] = True,
110111
callback_mix_fn: Optional[Callable] = None,
111-
callback_mix_kwargs: dict = {}
112+
callback_mix_kwargs: dict = {},
113+
room_acoustics_backend: Optional[Any] = None,
114+
room_acoustics_backend_kwargs: Optional[dict] = None,
112115
) -> AudioDialog:
113116
"""
114117
Convert a dialogue into an audio dialogue with comprehensive audio processing.
@@ -191,6 +194,13 @@ def to_audio(
191194
:type callback_mix_fn: Optional[Callable]
192195
:param callback_mix_kwargs: Keyword arguments for the callback function.
193196
:type callback_mix_kwargs: dict
197+
:param room_acoustics_backend: Backend used in step 3 for room acoustics simulation.
198+
Supports None (defaults to PyroomAcousticsBackend),
199+
a backend class/instance, or an object exposing simulate(...).
200+
:type room_acoustics_backend: Optional[Any]
201+
:param room_acoustics_backend_kwargs: Optional kwargs used to instantiate/configure
202+
the room acoustics backend.
203+
:type room_acoustics_backend_kwargs: Optional[dict]
194204
:return: Audio dialogue with processed audio data.
195205
:rtype: AudioDialog
196206
"""
@@ -272,26 +282,35 @@ def to_audio(
272282

273283
if perform_room_acoustics:
274284

285+
# Resolve the room acoustics backend
286+
_acoustics_backend = resolve_room_acoustics_backend(
287+
room_acoustics_backend,
288+
room_acoustics_backend_kwargs
289+
)
290+
275291
# Place the speakers around the furnitures in the room
276-
for _role, _kwargs in speaker_positions.items():
292+
if isinstance(room, Room):
277293

278-
if _role in room.speakers_positions:
279-
continue
294+
for _role, _kwargs in speaker_positions.items():
280295

281-
room.place_speaker_around_furniture(
282-
speaker_name=_role,
283-
furniture_name=_kwargs["furniture_name"],
284-
max_distance=_kwargs["max_distance"],
285-
side=_kwargs["side"]
286-
)
296+
if _role in room.speakers_positions:
297+
continue
298+
299+
room.place_speaker_around_furniture(
300+
speaker_name=_role,
301+
furniture_name=_kwargs["furniture_name"],
302+
max_distance=_kwargs["max_distance"],
303+
side=_kwargs["side"]
304+
)
287305

288306
_environment = {
289307
"room": room,
290308
"background_effect": background_effect,
291309
"foreground_effect": foreground_effect,
292310
"foreground_effect_position": foreground_effect_position,
293311
"source_volumes": source_volumes,
294-
"kwargs_pyroom": kwargs_pyroom
312+
"kwargs_pyroom": kwargs_pyroom,
313+
"room_acoustics_backend": _acoustics_backend
295314
}
296315

297316
else:
@@ -318,6 +337,8 @@ def to_audio(
318337
normalize=normalize,
319338
callback_mix_fn=callback_mix_fn,
320339
callback_mix_kwargs=callback_mix_kwargs,
340+
room_acoustics_backend=room_acoustics_backend,
341+
room_acoustics_backend_kwargs=room_acoustics_backend_kwargs,
321342
)
322343

323344
finally:
@@ -547,7 +568,9 @@ def inference(
547568
remove_silences: Optional[bool] = True,
548569
normalize: Optional[bool] = True,
549570
callback_mix_fn: Optional[Callable] = None,
550-
callback_mix_kwargs: dict = {}
571+
callback_mix_kwargs: dict = {},
572+
room_acoustics_backend: Optional[Any] = None,
573+
room_acoustics_backend_kwargs: Optional[dict] = None,
551574
) -> AudioDialog:
552575
"""
553576
Execute the complete audio generation pipeline.
@@ -607,6 +630,13 @@ def inference(
607630
:type callback_mix_fn: Optional[Callable]
608631
:param callback_mix_kwargs: Keyword arguments for the callback function.
609632
:type callback_mix_kwargs: dict
633+
:param room_acoustics_backend: Backend used in step 3 for room acoustics simulation.
634+
Supports None (defaults to PyroomAcousticsBackend),
635+
a backend class/instance, or an object exposing simulate(...).
636+
:type room_acoustics_backend: Optional[Any]
637+
:param room_acoustics_backend_kwargs: Optional kwargs used to instantiate/configure
638+
the room acoustics backend.
639+
:type room_acoustics_backend_kwargs: Optional[dict]
610640
:return: Processed audio dialogue with all audio data.
611641
:rtype: AudioDialog
612642
@@ -649,8 +679,14 @@ def inference(
649679
else:
650680
logger.info(f"[Initialization] Audio file format for generation is set to {audio_file_format}")
651681

682+
_env_backend = environment.get("room_acoustics_backend") if environment is not None else None
683+
_backend = resolve_room_acoustics_backend(
684+
room_acoustics_backend if room_acoustics_backend is not None else _env_backend,
685+
room_acoustics_backend_kwargs
686+
)
687+
652688
# Create variables from room from the environment
653-
room: Room = (
689+
room: Any = (
654690
environment["room"]
655691
if environment is not None
656692
and "room" in environment
@@ -664,6 +700,8 @@ def inference(
664700
and environment["kwargs_pyroom"] is not None
665701
and "ray_tracing" in environment["kwargs_pyroom"]
666702
and environment["kwargs_pyroom"]["ray_tracing"]
703+
and isinstance(room, Room)
704+
and _backend.name == "pyroom"
667705
and room.directivity_type is not None
668706
and room.directivity_type != DirectivityType.OMNIDIRECTIONAL
669707
):
@@ -844,8 +882,8 @@ def inference(
844882

845883
logger.info("[Step 3] Starting...")
846884

847-
if not isinstance(environment["room"], Room):
848-
raise ValueError("The room must be a Room object")
885+
if _backend.requires_room and room is None:
886+
raise ValueError(f"The selected acoustics backend '{_backend.name}' requires a room object.")
849887

850888
# Check if the step 2 is not done
851889
if len(dialog.audio_step_2_filepath) < 1:
@@ -864,34 +902,23 @@ def inference(
864902
logger.info(f"[Step 3] Generating room accoustic for dialogue {dialog.id}")
865903

866904
# Override the room name if provided otherwise use the hash of the room
867-
room_name = room_name if room_name is not None else room.name
905+
room_name = (
906+
room_name
907+
if room_name is not None
908+
else (room.name if isinstance(room, Room) else _backend.name)
909+
)
868910

869-
# Generate the audio room accoustic from the dialog and room object
870-
dialog: AudioDialog = generate_audio_room_accoustic(
911+
# Generate step-3 audio using the selected acoustics backend.
912+
dialog: AudioDialog = _backend.simulate(
871913
dialog=dialog,
872914
room=room,
873915
dialog_directory=dialog_directory,
874916
room_name=room_name,
875-
kwargs_pyroom=environment["kwargs_pyroom"] if "kwargs_pyroom" in environment else {},
876-
source_volumes=environment["source_volumes"] if "source_volumes" in environment else {},
877917
audio_file_format=audio_file_format,
878-
background_effect=(
879-
environment["background_effect"]
880-
if "background_effect" in environment
881-
else "white_noise"
882-
),
883-
foreground_effect=(
884-
environment["foreground_effect"]
885-
if "foreground_effect" in environment
886-
else "ac_noise_minimal"
887-
),
888-
foreground_effect_position=(
889-
environment["foreground_effect_position"]
890-
if "foreground_effect_position" in environment
891-
else RoomPosition.TOP_RIGHT
892-
),
918+
environment=environment,
893919
callback_mix_fn=callback_mix_fn,
894920
callback_mix_kwargs=callback_mix_kwargs,
921+
sampling_rate=self.sampling_rate,
895922
)
896923

897924
logger.info(f"[Step 3] Room accoustic has been generated successfully for dialogue {dialog.id}!")
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
"""
2+
Room acoustics backend package.
3+
4+
This package exposes room acoustics backend contracts, built-in backends,
5+
and the backend resolver utility.
6+
"""
7+
8+
from .base import BaseRoomAcousticsBackend
9+
from .pyroomacoustics import PyroomAcousticsBackend
10+
from .resolver import resolve_room_acoustics_backend
11+
12+
__all__ = [
13+
"BaseRoomAcousticsBackend",
14+
"PyroomAcousticsBackend",
15+
"resolve_room_acoustics_backend",
16+
]
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
"""
2+
Base room acoustics backend contract.
3+
"""
4+
5+
from abc import ABC, abstractmethod
6+
from typing import Any, Callable, Optional
7+
8+
from sdialog.audio.dialog import AudioDialog
9+
10+
11+
class BaseRoomAcousticsBackend(ABC):
12+
"""
13+
Abstract base class for room acoustics backends.
14+
"""
15+
16+
requires_room: bool = True
17+
name: str = "base"
18+
19+
@abstractmethod
20+
def simulate(
21+
self,
22+
dialog: AudioDialog,
23+
room: Optional[Any],
24+
dialog_directory: str,
25+
room_name: str,
26+
audio_file_format: str = "wav",
27+
environment: Optional[dict] = None,
28+
callback_mix_fn: Optional[Callable] = None,
29+
callback_mix_kwargs: Optional[dict] = None,
30+
sampling_rate: int = 44_100,
31+
) -> AudioDialog:
32+
"""
33+
Run room acoustics simulation and update the dialog outputs.
34+
35+
:param dialog: Audio dialog object to update.
36+
:type dialog: AudioDialog
37+
:param room: Room configuration used for simulation.
38+
:type room: Optional[Any]
39+
:param dialog_directory: Relative output directory for generated files.
40+
:type dialog_directory: str
41+
:param room_name: Name of the room profile to generate.
42+
:type room_name: str
43+
:param audio_file_format: Audio format for exported files (default: "wav").
44+
:type audio_file_format: str
45+
:param environment: Backend-specific environment parameters.
46+
:type environment: Optional[dict]
47+
:param callback_mix_fn: Optional callback used during audio mixing.
48+
:type callback_mix_fn: Optional[Callable]
49+
:param callback_mix_kwargs: Optional keyword arguments for the mix callback.
50+
:type callback_mix_kwargs: Optional[dict]
51+
:param sampling_rate: Sampling rate used for generated audio (default: 44100).
52+
:type sampling_rate: int
53+
:return: Updated dialog with room acoustics outputs.
54+
:rtype: AudioDialog
55+
"""
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
"""
2+
Pyroomacoustics backend implementation.
3+
"""
4+
5+
from typing import Any, Callable, Optional
6+
7+
from sdialog.audio.dialog import AudioDialog
8+
from sdialog.audio.room import Room, RoomPosition
9+
10+
from .base import BaseRoomAcousticsBackend
11+
12+
13+
class PyroomAcousticsBackend(BaseRoomAcousticsBackend):
14+
"""
15+
Room acoustics backend using the existing pyroomacoustics flow.
16+
"""
17+
18+
requires_room = True
19+
name = "pyroom"
20+
21+
def simulate(
22+
self,
23+
dialog: AudioDialog,
24+
room: Optional[Any],
25+
dialog_directory: str,
26+
room_name: str,
27+
audio_file_format: str = "wav",
28+
environment: Optional[dict] = None,
29+
callback_mix_fn: Optional[Callable] = None,
30+
callback_mix_kwargs: Optional[dict] = None,
31+
sampling_rate: int = 44_100,
32+
) -> AudioDialog:
33+
"""
34+
Generate room acoustics audio with pyroomacoustics.
35+
36+
:param dialog: Audio dialog object to update.
37+
:type dialog: AudioDialog
38+
:param room: Room configuration used for simulation.
39+
:type room: Optional[Any]
40+
:param dialog_directory: Relative output directory for generated files.
41+
:type dialog_directory: str
42+
:param room_name: Name of the room profile to generate.
43+
:type room_name: str
44+
:param audio_file_format: Audio format for exported files (default: "wav").
45+
:type audio_file_format: str
46+
:param environment: Optional environment overrides for pyroom settings.
47+
:type environment: Optional[dict]
48+
:param callback_mix_fn: Optional callback used during audio mixing.
49+
:type callback_mix_fn: Optional[Callable]
50+
:param callback_mix_kwargs: Optional keyword arguments for the mix callback.
51+
:type callback_mix_kwargs: Optional[dict]
52+
:param sampling_rate: Unused argument kept for API compatibility.
53+
:type sampling_rate: int
54+
:return: Updated dialog with room acoustics outputs.
55+
:rtype: AudioDialog
56+
:raises ValueError: If ``room`` is not an instance of ``Room``.
57+
"""
58+
del sampling_rate
59+
if not isinstance(room, Room):
60+
raise ValueError("PyroomAcousticsBackend expects `room` to be an instance of `Room`.")
61+
62+
from sdialog.audio import generate_audio_room_accoustic
63+
64+
env = environment or {}
65+
return generate_audio_room_accoustic(
66+
dialog=dialog,
67+
room=room,
68+
dialog_directory=dialog_directory,
69+
room_name=room_name,
70+
kwargs_pyroom=env.get("kwargs_pyroom", {}),
71+
source_volumes=env.get("source_volumes", {}),
72+
audio_file_format=audio_file_format,
73+
background_effect=env.get("background_effect", "white_noise"),
74+
foreground_effect=env.get("foreground_effect", "ac_noise_minimal"),
75+
foreground_effect_position=env.get("foreground_effect_position", RoomPosition.TOP_RIGHT),
76+
callback_mix_fn=callback_mix_fn,
77+
callback_mix_kwargs=callback_mix_kwargs or {},
78+
)

0 commit comments

Comments
 (0)