1212
1313Usage:
1414 Configure in vcon-server config.yml:
15-
15+
1616 links:
1717 speechmatics:
1818 module: speechmatics_vcon_link
5151 # Speechmatics API settings
5252 "api_key" : os .getenv ("SPEECHMATICS_API_KEY" ),
5353 "api_url" : "https://asr.api.speechmatics.com/v2" ,
54-
5554 # Output format options
5655 "save_native_format" : True ,
5756 "save_wtf_format" : True ,
58-
5957 # Transcription settings
6058 "model" : "enhanced" , # "standard" or "enhanced"
6159 "language" : None , # None for auto-detect
6260 "enable_diarization" : False ,
6361 "diarization_max_speakers" : None ,
64-
6562 # Polling settings
6663 "poll_interval" : 5 ,
6764 "max_poll_attempts" : 120 ,
68-
6965 # Processing behavior
7066 "skip_if_exists" : True ,
71-
7267 # Redis settings
7368 "redis_host" : "localhost" ,
7469 "redis_port" : 6379 ,
7873
7974def get_redis_connection (opts : dict ):
8075 """Get Redis connection based on options.
81-
76+
8277 Args:
8378 opts: Configuration options
84-
79+
8580 Returns:
8681 Redis connection object
8782 """
8883 host = opts .get ("redis_host" , "localhost" )
8984 port = opts .get ("redis_port" , 6379 )
9085 db = opts .get ("redis_db" , 0 )
91-
86+
9287 return redis .Redis (host = host , port = port , db = db , decode_responses = False )
9388
9489
9590def get_vcon_from_redis (redis_conn , vcon_uuid : str ) -> Optional [vcon .Vcon ]:
9691 """Retrieve a vCon from Redis.
97-
92+
9893 Args:
9994 redis_conn: Redis connection object
10095 vcon_uuid: UUID of the vCon to retrieve
101-
96+
10297 Returns:
10398 vCon object or None if not found
10499 """
@@ -114,11 +109,11 @@ def get_vcon_from_redis(redis_conn, vcon_uuid: str) -> Optional[vcon.Vcon]:
114109
115110def store_vcon_to_redis (redis_conn , vcon_obj : vcon .Vcon ) -> bool :
116111 """Store a vCon to Redis.
117-
112+
118113 Args:
119114 redis_conn: Redis connection object
120115 vcon_obj: vCon object to store
121-
116+
122117 Returns:
123118 True if successful, False otherwise
124119 """
@@ -134,186 +129,174 @@ def store_vcon_to_redis(redis_conn, vcon_obj: vcon.Vcon) -> bool:
134129
135130def has_transcription (vcon_obj : vcon .Vcon , dialog_index : int , analysis_type : str ) -> bool :
136131 """Check if a transcription already exists for a dialog.
137-
132+
138133 Args:
139134 vcon_obj: The vCon object
140135 dialog_index: Index of the dialog
141136 analysis_type: Type of analysis to check for
142-
137+
143138 Returns:
144139 True if transcription exists, False otherwise
145140 """
146141 for analysis in vcon_obj .analysis :
147- if (analysis .get ("dialog" ) == dialog_index and
148- analysis .get ("type" ) == analysis_type ):
142+ if analysis .get ("dialog" ) == dialog_index and analysis .get ("type" ) == analysis_type :
149143 return True
150144 return False
151145
152146
153147def get_audio_url (dialog : dict ) -> Optional [str ]:
154148 """Extract audio URL from a dialog object.
155-
149+
156150 Args:
157151 dialog: Dialog dictionary from vCon
158-
152+
159153 Returns:
160154 Audio URL or None if not found
161155 """
162156 # Check for URL field
163157 url = dialog .get ("url" )
164158 if url :
165159 return url
166-
160+
167161 # Check for filename that might be a URL
168162 filename = dialog .get ("filename" )
169163 if filename and (filename .startswith ("http://" ) or filename .startswith ("https://" )):
170164 return filename
171-
165+
172166 return None
173167
174168
175169def is_audio_dialog (dialog : dict ) -> bool :
176170 """Check if a dialog contains audio content.
177-
171+
178172 Args:
179173 dialog: Dialog dictionary from vCon
180-
174+
181175 Returns:
182176 True if dialog is an audio recording
183177 """
184178 dialog_type = dialog .get ("type" , "" )
185179 mimetype = dialog .get ("mimetype" , "" )
186-
180+
187181 # Check type field
188182 if dialog_type in ("recording" , "audio" ):
189183 return True
190-
184+
191185 # Check mimetype
192186 if mimetype .startswith ("audio/" ):
193187 return True
194-
188+
195189 return False
196190
197191
198- def run (
199- vcon_uuid : str ,
200- link_name : str ,
201- opts : dict = None
202- ) -> Optional [str ]:
192+ def run (vcon_uuid : str , link_name : str , opts : dict = None ) -> Optional [str ]:
203193 """Main link function - processes vCon through Speechmatics transcription.
204-
194+
205195 This function is called by the vCon server to process a vCon through this link.
206196 It iterates through audio dialogs, submits them to Speechmatics for transcription,
207197 and stores the results as analysis entries.
208-
198+
209199 Args:
210200 vcon_uuid: UUID of the vCon to process
211201 link_name: Name of this link instance (from config)
212202 opts: Configuration options for this link
213-
203+
214204 Returns:
215205 vcon_uuid (str) if processing should continue, None to stop the chain
216-
206+
217207 Raises:
218208 ValueError: If required configuration is missing
219209 SpeechmaticsError: If transcription fails
220210 """
221211 module_name = __name__ .split ("." )[- 1 ]
222212 logger .info (f"Starting { module_name } :{ link_name } plugin for: { vcon_uuid } " )
223-
213+
224214 # Merge provided options with defaults
225215 merged_opts = default_options .copy ()
226216 if opts :
227217 merged_opts .update (opts )
228218 opts = merged_opts
229-
219+
230220 # Validate required options
231221 api_key = opts .get ("api_key" )
232222 if not api_key :
233223 raise ValueError (
234224 "Speechmatics API key is required. "
235225 "Set SPEECHMATICS_API_KEY environment variable or provide api_key in options."
236226 )
237-
227+
238228 # Check that at least one output format is enabled
239229 if not opts .get ("save_native_format" ) and not opts .get ("save_wtf_format" ):
240230 logger .warning ("Neither save_native_format nor save_wtf_format is enabled. Enabling both." )
241231 opts ["save_native_format" ] = True
242232 opts ["save_wtf_format" ] = True
243-
233+
244234 # Get Redis connection
245235 redis_conn = get_redis_connection (opts )
246-
236+
247237 # Retrieve vCon from Redis
248238 vcon_obj = get_vcon_from_redis (redis_conn , vcon_uuid )
249239 if not vcon_obj :
250240 logger .error (f"vCon not found: { vcon_uuid } " )
251241 return None
252-
242+
253243 # Initialize Speechmatics client
254- client = SpeechmaticsClient (
255- api_key = api_key ,
256- api_url = opts .get ("api_url" )
257- )
258-
244+ client = SpeechmaticsClient (api_key = api_key , api_url = opts .get ("api_url" ))
245+
259246 # Build transcription config
260247 transcription_config = TranscriptionConfig (
261248 language = opts .get ("language" ),
262249 operating_point = opts .get ("model" , "enhanced" ),
263250 enable_diarization = opts .get ("enable_diarization" , False ),
264251 diarization_max_speakers = opts .get ("diarization_max_speakers" ),
265252 )
266-
253+
267254 # Track if we made any changes
268255 vcon_modified = False
269256 dialogs_processed = 0
270-
257+
271258 # Process each dialog
272259 for index , dialog in enumerate (vcon_obj .dialog ):
273260 # Skip non-audio dialogs
274261 if not is_audio_dialog (dialog ):
275262 logger .debug (f"Skipping dialog { index } : not an audio recording" )
276263 continue
277-
264+
278265 # Check if already transcribed
279266 if opts .get ("skip_if_exists" ):
280- if opts .get ("save_native_format" ) and has_transcription (
281- vcon_obj , index , "speechmatics_transcription"
282- ):
267+ if opts .get ("save_native_format" ) and has_transcription (vcon_obj , index , "speechmatics_transcription" ):
283268 logger .info (f"Dialog { index } already has native transcription, skipping" )
284269 continue
285-
286- if opts .get ("save_wtf_format" ) and has_transcription (
287- vcon_obj , index , "wtf_transcription"
288- ):
270+
271+ if opts .get ("save_wtf_format" ) and has_transcription (vcon_obj , index , "wtf_transcription" ):
289272 logger .info (f"Dialog { index } already has WTF transcription, skipping" )
290273 continue
291-
274+
292275 # Get audio URL
293276 audio_url = get_audio_url (dialog )
294277 if not audio_url :
295278 logger .warning (f"Dialog { index } has no audio URL, skipping" )
296279 continue
297-
280+
298281 logger .info (f"Processing dialog { index } : { audio_url } " )
299-
282+
300283 try :
301284 # Track processing time
302285 start_time = time .time ()
303-
286+
304287 # Submit and wait for transcription
305288 transcript_result = client .transcribe (
306289 audio_url = audio_url ,
307290 config = transcription_config ,
308291 poll_interval = opts .get ("poll_interval" , 5 ),
309292 max_attempts = opts .get ("max_poll_attempts" , 120 ),
310293 )
311-
294+
312295 processing_time = time .time () - start_time
313-
296+
314297 # Get dialog start time for metadata
315298 dialog_start = dialog .get ("start" )
316-
299+
317300 # Save native format
318301 if opts .get ("save_native_format" ):
319302 vcon_obj .add_analysis (
@@ -324,15 +307,15 @@ def run(
324307 encoding = "json" ,
325308 )
326309 logger .info (f"Added native transcription for dialog { index } " )
327-
310+
328311 # Save WTF format
329312 if opts .get ("save_wtf_format" ):
330313 wtf_result = convert_to_wtf (
331314 transcript_result ,
332315 created_at = dialog_start ,
333316 processing_time = processing_time ,
334317 )
335-
318+
336319 vcon_obj .add_analysis (
337320 type = "wtf_transcription" ,
338321 dialog = index ,
@@ -341,10 +324,10 @@ def run(
341324 encoding = "json" ,
342325 )
343326 logger .info (f"Added WTF transcription for dialog { index } " )
344-
327+
345328 vcon_modified = True
346329 dialogs_processed += 1
347-
330+
348331 except SpeechmaticsAuthError as e :
349332 logger .error (f"Authentication error for dialog { index } : { e } " )
350333 raise
@@ -357,7 +340,7 @@ def run(
357340 except Exception as e :
358341 logger .error (f"Unexpected error processing dialog { index } : { e } " , exc_info = True )
359342 raise
360-
343+
361344 # Store updated vCon back to Redis if modified
362345 if vcon_modified :
363346 if not store_vcon_to_redis (redis_conn , vcon_obj ):
@@ -366,7 +349,6 @@ def run(
366349 logger .info (f"Stored updated vCon with { dialogs_processed } transcriptions" )
367350 else :
368351 logger .info (f"No dialogs processed for vCon: { vcon_uuid } " )
369-
352+
370353 logger .info (f"Finished { module_name } :{ link_name } plugin for: { vcon_uuid } " )
371354 return vcon_uuid
372-
0 commit comments