5252from tqdm import tqdm
5353import soundfile as sf
5454from datasets import load_dataset
55- from typing import List , Optional , Union , Callable
55+ from typing import List , Optional , Union , Callable , Any
5656
5757from sdialog import Dialog
5858from sdialog .audio .utils import logger
5959from sdialog .audio .dialog import AudioDialog
6060from sdialog .audio .processing import AudioProcessor
61+ from sdialog .audio import generate_utterances_audios
6162from sdialog .audio .normalizers import normalize_audio
6263from sdialog .audio .jsalt import MedicalRoomGenerator , RoomRole
6364from sdialog .audio .room import Room , RoomPosition , DirectivityType
6465from sdialog .audio .tts import BaseTTS , Qwen3TTS , Qwen3TTSVoiceClone
66+ from sdialog .audio .room_acoustics_backends import resolve_room_acoustics_backend
6567from sdialog .audio .voice_database import Voice , BaseVoiceDatabase , HuggingfaceVoiceDatabase
66- from sdialog .audio import generate_utterances_audios , generate_audio_room_accoustic
6768from sdialog .audio .impulse_response_database import ImpulseResponseDatabase , RecordingDevice
6869from sdialog .audio .utils import (
6970 Role ,
@@ -108,7 +109,9 @@ def to_audio(
108109 remove_silences : Optional [bool ] = True ,
109110 normalize : Optional [bool ] = True ,
110111 callback_mix_fn : Optional [Callable ] = None ,
111- callback_mix_kwargs : dict = {}
112+ callback_mix_kwargs : dict = {},
113+ room_acoustics_backend : Optional [Any ] = None ,
114+ room_acoustics_backend_kwargs : Optional [dict ] = None ,
112115) -> AudioDialog :
113116 """
114117 Convert a dialogue into an audio dialogue with comprehensive audio processing.
@@ -191,6 +194,13 @@ def to_audio(
191194 :type callback_mix_fn: Optional[Callable]
192195 :param callback_mix_kwargs: Keyword arguments for the callback function.
193196 :type callback_mix_kwargs: dict
197+ :param room_acoustics_backend: Backend used in step 3 for room acoustics simulation.
198+ Supports None (defaults to PyroomAcousticsBackend),
199+ a backend class/instance, or an object exposing simulate(...).
200+ :type room_acoustics_backend: Optional[Any]
201+ :param room_acoustics_backend_kwargs: Optional kwargs used to instantiate/configure
202+ the room acoustics backend.
203+ :type room_acoustics_backend_kwargs: Optional[dict]
194204 :return: Audio dialogue with processed audio data.
195205 :rtype: AudioDialog
196206 """
@@ -272,26 +282,35 @@ def to_audio(
272282
273283 if perform_room_acoustics :
274284
285+ # Resolve the room acoustics backend
286+ _acoustics_backend = resolve_room_acoustics_backend (
287+ room_acoustics_backend ,
288+ room_acoustics_backend_kwargs
289+ )
290+
275291 # Place the speakers around the furnitures in the room
276- for _role , _kwargs in speaker_positions . items ( ):
292+ if isinstance ( room , Room ):
277293
278- if _role in room .speakers_positions :
279- continue
294+ for _role , _kwargs in speaker_positions .items ():
280295
281- room .place_speaker_around_furniture (
282- speaker_name = _role ,
283- furniture_name = _kwargs ["furniture_name" ],
284- max_distance = _kwargs ["max_distance" ],
285- side = _kwargs ["side" ]
286- )
296+ if _role in room .speakers_positions :
297+ continue
298+
299+ room .place_speaker_around_furniture (
300+ speaker_name = _role ,
301+ furniture_name = _kwargs ["furniture_name" ],
302+ max_distance = _kwargs ["max_distance" ],
303+ side = _kwargs ["side" ]
304+ )
287305
288306 _environment = {
289307 "room" : room ,
290308 "background_effect" : background_effect ,
291309 "foreground_effect" : foreground_effect ,
292310 "foreground_effect_position" : foreground_effect_position ,
293311 "source_volumes" : source_volumes ,
294- "kwargs_pyroom" : kwargs_pyroom
312+ "kwargs_pyroom" : kwargs_pyroom ,
313+ "room_acoustics_backend" : _acoustics_backend
295314 }
296315
297316 else :
@@ -318,6 +337,8 @@ def to_audio(
318337 normalize = normalize ,
319338 callback_mix_fn = callback_mix_fn ,
320339 callback_mix_kwargs = callback_mix_kwargs ,
340+ room_acoustics_backend = room_acoustics_backend ,
341+ room_acoustics_backend_kwargs = room_acoustics_backend_kwargs ,
321342 )
322343
323344 finally :
@@ -547,7 +568,9 @@ def inference(
547568 remove_silences : Optional [bool ] = True ,
548569 normalize : Optional [bool ] = True ,
549570 callback_mix_fn : Optional [Callable ] = None ,
550- callback_mix_kwargs : dict = {}
571+ callback_mix_kwargs : dict = {},
572+ room_acoustics_backend : Optional [Any ] = None ,
573+ room_acoustics_backend_kwargs : Optional [dict ] = None ,
551574 ) -> AudioDialog :
552575 """
553576 Execute the complete audio generation pipeline.
@@ -607,6 +630,13 @@ def inference(
607630 :type callback_mix_fn: Optional[Callable]
608631 :param callback_mix_kwargs: Keyword arguments for the callback function.
609632 :type callback_mix_kwargs: dict
633+ :param room_acoustics_backend: Backend used in step 3 for room acoustics simulation.
634+ Supports None (defaults to PyroomAcousticsBackend),
635+ a backend class/instance, or an object exposing simulate(...).
636+ :type room_acoustics_backend: Optional[Any]
637+ :param room_acoustics_backend_kwargs: Optional kwargs used to instantiate/configure
638+ the room acoustics backend.
639+ :type room_acoustics_backend_kwargs: Optional[dict]
610640 :return: Processed audio dialogue with all audio data.
611641 :rtype: AudioDialog
612642
@@ -649,8 +679,14 @@ def inference(
649679 else :
650680 logger .info (f"[Initialization] Audio file format for generation is set to { audio_file_format } " )
651681
682+ _env_backend = environment .get ("room_acoustics_backend" ) if environment is not None else None
683+ _backend = resolve_room_acoustics_backend (
684+ room_acoustics_backend if room_acoustics_backend is not None else _env_backend ,
685+ room_acoustics_backend_kwargs
686+ )
687+
652688 # Create variables from room from the environment
653- room : Room = (
689+ room : Any = (
654690 environment ["room" ]
655691 if environment is not None
656692 and "room" in environment
@@ -664,6 +700,8 @@ def inference(
664700 and environment ["kwargs_pyroom" ] is not None
665701 and "ray_tracing" in environment ["kwargs_pyroom" ]
666702 and environment ["kwargs_pyroom" ]["ray_tracing" ]
703+ and isinstance (room , Room )
704+ and _backend .name == "pyroom"
667705 and room .directivity_type is not None
668706 and room .directivity_type != DirectivityType .OMNIDIRECTIONAL
669707 ):
@@ -844,8 +882,8 @@ def inference(
844882
845883 logger .info ("[Step 3] Starting..." )
846884
847- if not isinstance ( environment [ " room" ], Room ) :
848- raise ValueError ("The room must be a Room object" )
885+ if _backend . requires_room and room is None :
886+ raise ValueError (f "The selected acoustics backend ' { _backend . name } ' requires a room object. " )
849887
850888 # Check if the step 2 is not done
851889 if len (dialog .audio_step_2_filepath ) < 1 :
@@ -864,34 +902,23 @@ def inference(
864902 logger .info (f"[Step 3] Generating room accoustic for dialogue { dialog .id } " )
865903
866904 # Override the room name if provided otherwise use the hash of the room
867- room_name = room_name if room_name is not None else room .name
905+ room_name = (
906+ room_name
907+ if room_name is not None
908+ else (room .name if isinstance (room , Room ) else _backend .name )
909+ )
868910
869- # Generate the audio room accoustic from the dialog and room object
870- dialog : AudioDialog = generate_audio_room_accoustic (
911+ # Generate step-3 audio using the selected acoustics backend.
912+ dialog : AudioDialog = _backend . simulate (
871913 dialog = dialog ,
872914 room = room ,
873915 dialog_directory = dialog_directory ,
874916 room_name = room_name ,
875- kwargs_pyroom = environment ["kwargs_pyroom" ] if "kwargs_pyroom" in environment else {},
876- source_volumes = environment ["source_volumes" ] if "source_volumes" in environment else {},
877917 audio_file_format = audio_file_format ,
878- background_effect = (
879- environment ["background_effect" ]
880- if "background_effect" in environment
881- else "white_noise"
882- ),
883- foreground_effect = (
884- environment ["foreground_effect" ]
885- if "foreground_effect" in environment
886- else "ac_noise_minimal"
887- ),
888- foreground_effect_position = (
889- environment ["foreground_effect_position" ]
890- if "foreground_effect_position" in environment
891- else RoomPosition .TOP_RIGHT
892- ),
918+ environment = environment ,
893919 callback_mix_fn = callback_mix_fn ,
894920 callback_mix_kwargs = callback_mix_kwargs ,
921+ sampling_rate = self .sampling_rate ,
895922 )
896923
897924 logger .info (f"[Step 3] Room accoustic has been generated successfully for dialogue { dialog .id } !" )
0 commit comments