@@ -284,7 +284,7 @@ def _transcribe_stream(self, duration: int = 0, audio_source: BaseMicrophone | b
284284 future = None
285285
286286 try :
287- logger .info (f"Creating transcription session with model={ self .model } , language={ self .language } " )
287+ logger .debug (f"Creating transcription session with model={ self .model } , language={ self .language } " )
288288 # Create transcription session
289289 url = f"{ self .api_base_url } /transcriptions/create"
290290 data = {
@@ -365,7 +365,7 @@ def _transcribe_stream(self, duration: int = 0, audio_source: BaseMicrophone | b
365365 pass # Ignore any errors during cleanup
366366 raise
367367
368- except TimeoutError :
368+ except ( TimeoutError , asyncio . TimeoutError ) :
369369 raise
370370
371371 except Exception as e :
@@ -380,7 +380,7 @@ def _asyncio_loop(self):
380380 Dedicated thread for running the asyncio event loop.
381381 Manages transcription sessions posted via run_coroutine_threadsafe.
382382 """
383- logger .info ("Asyncio event loop starting" )
383+ logger .debug ("Asyncio event loop starting" )
384384
385385 self ._worker_loop = asyncio .new_event_loop ()
386386 asyncio .set_event_loop (self ._worker_loop )
@@ -404,7 +404,7 @@ async def keep_alive():
404404 )
405405 self ._worker_loop .close ()
406406 self ._worker_loop = None
407- logger .info ("Asyncio event loop stopped" )
407+ logger .debug ("Asyncio event loop stopped" )
408408
409409 async def _transcription_session_handler (self , session_info : MicSessionInfo | WAVSessionInfo ):
410410 """
@@ -414,7 +414,7 @@ async def _transcription_session_handler(self, session_info: MicSessionInfo | WA
414414 session_id = session_info .session_id
415415
416416 async with websockets .connect (self .ws_url ) as websocket :
417- logger .info (f"WebSocket connected for session { session_id } " )
417+ logger .debug (f"WebSocket connected for session { session_id } " )
418418
419419 # Start audio sending and transcription receiving tasks
420420 if isinstance (session_info , MicSessionInfo ):
@@ -501,7 +501,7 @@ def _mic_reader_loop(self, mic: BaseMicrophone):
501501 and publishing to its subscribers.
502502 """
503503 mic_id = id (mic )
504- logger .info (f"Audio reader thread starting for mic { mic_id } " )
504+ logger .debug (f"Audio reader thread starting for mic { mic_id } " )
505505
506506 try :
507507 while not self ._stop_worker .is_set ():
@@ -511,15 +511,15 @@ def _mic_reader_loop(self, mic: BaseMicrophone):
511511 self ._audio_stream_router .publish (mic , audio_chunk )
512512 else :
513513 # No subscribers left, exit the thread
514- logger .info (f"No more subscribers for mic { mic_id } , stopping reader thread" )
514+ logger .debug (f"No more subscribers for mic { mic_id } , stopping reader thread" )
515515 break
516516
517517 except Exception as e :
518518 logger .error (f"Audio reader thread error for mic { mic_id } : { e } " )
519519 raise
520520 finally :
521521 self ._audio_stream_router .unregister_thread (mic )
522- logger .info (f"Audio reader thread stopped for mic { mic_id } " )
522+ logger .debug (f"Audio reader thread stopped for mic { mic_id } " )
523523
524524 async def _send_wav_audio (self , websocket : websockets .ClientConnection , session_info : WAVSessionInfo ):
525525 """Sends audio chunks from WAV numpy array to WebSocket"""
@@ -529,17 +529,34 @@ async def _send_wav_audio(self, websocket: websockets.ClientConnection, session_
529529 session_id = session_info .session_id
530530 wav_audio = session_info .wav_audio
531531
532- with wave .open (io .BytesIO (wav_audio ), 'rb' ) as wf :
533- sample_rate = wf .getframerate ()
534- num_channels = wf .getnchannels ()
535- sample_width = wf .getsampwidth ()
536- frames = wf .readframes (wf .getnframes ())
537-
538- # Calculate chunk size for ~100ms of audio
539- chunk_duration = 0.1
540- chunk_size = int (chunk_duration * sample_rate * num_channels * sample_width )
541-
542532 try :
533+ # Parse WAV file to get format information and calculate header size
534+ with wave .open (io .BytesIO (wav_audio ), 'rb' ) as wf :
535+ sample_rate = wf .getframerate ()
536+ num_channels = wf .getnchannels ()
537+ sample_width = wf .getsampwidth ()
538+ num_frames = wf .getnframes ()
539+
540+ frames_size = num_frames * num_channels * sample_width
541+ header_size = len (wav_audio ) - frames_size
542+
543+ header = wav_audio [:header_size ]
544+ frames = wav_audio [header_size :]
545+
546+ header_base64 = base64 .b64encode (header ).decode ('utf-8' )
547+ header_message = {
548+ "message_type" : "transcriptions_session_audio" ,
549+ "message_source" : "audio_analytics_api" ,
550+ "session_id" : session_id ,
551+ "type" : "input_audio" ,
552+ "data" : header_base64
553+ }
554+ await websocket .send (json .dumps (header_message ))
555+
556+ # Calculate chunk size for ~100ms of audio
557+ chunk_duration = 0.1
558+ chunk_size = int (chunk_duration * sample_rate * num_channels * sample_width )
559+
543560 for i in range (0 , len (frames ), chunk_size ):
544561 iteration_start = time .perf_counter ()
545562
@@ -548,16 +565,14 @@ async def _send_wav_audio(self, websocket: websockets.ClientConnection, session_
548565
549566 audio_chunk = frames [i :i + chunk_size ]
550567 audio_base64 = base64 .b64encode (audio_chunk ).decode ('utf-8' )
551-
552- message = {
568+ chunk_message = {
553569 "message_type" : "transcriptions_session_audio" ,
554570 "message_source" : "audio_analytics_api" ,
555571 "session_id" : session_id ,
556572 "type" : "input_audio" ,
557573 "data" : audio_base64
558574 }
559-
560- await websocket .send (json .dumps (message ))
575+ await websocket .send (json .dumps (chunk_message ))
561576
562577 # Account for processing time to maintain real-time simulation
563578 elapsed = time .perf_counter () - iteration_start
@@ -567,6 +582,7 @@ async def _send_wav_audio(self, websocket: websockets.ClientConnection, session_
567582 except asyncio .CancelledError :
568583 logger .debug (f"Array audio sending cancelled for session { session_id } " )
569584 raise
585+
570586 except Exception as e :
571587 logger .error (f"Error sending array audio for session { session_id } : { e } " )
572588 raise
@@ -619,6 +635,7 @@ async def _receive_transcription(self, websocket: websockets.ClientConnection, s
619635 except asyncio .CancelledError :
620636 logger .debug (f"Receive task cancelled for session { session_id } " )
621637 raise
638+
622639 except Exception as e :
623640 logger .error (f"Error receiving transcription for { session_id } : { e } " )
624641 raise
0 commit comments