@@ -262,3 +262,128 @@ def test_get_transcript(self, client, wav_bytes):
262262 def test_get_missing_job_transcript (self , client ):
263263 resp = client .get ("/speechmatics/v1/jobs/nonexistent/transcript" )
264264 assert resp .status_code == 404
265+
266+
267+ # ---------------------------------------------------------------------------
268+ # Additional tests (8 new) covering edge cases
269+ # ---------------------------------------------------------------------------
270+
271+ class TestWhisperResponseFormats :
272+ """Additional Whisper response_format edge-case tests."""
273+
274+ def test_response_format_text_is_plain_text (self , client , wav_bytes ):
275+ """response_format=text must return Content-Type text/plain, not JSON."""
276+ resp = client .post (
277+ "/openai/v1/audio/transcriptions" ,
278+ files = {"file" : ("audio.wav" , wav_bytes , "audio/wav" )},
279+ data = {"model" : "whisper-1" , "response_format" : "text" },
280+ )
281+ assert resp .status_code == 200
282+ # Must NOT be a JSON object
283+ assert resp .text .strip () == "hello world"
284+ assert "text/plain" in resp .headers ["content-type" ]
285+
286+ def test_response_format_verbose_json_has_segments_field (self , client , wav_bytes ):
287+ """verbose_json response must contain a 'segments' key (may be empty list)."""
288+ resp = client .post (
289+ "/openai/v1/audio/transcriptions" ,
290+ files = {"file" : ("audio.wav" , wav_bytes , "audio/wav" )},
291+ data = {"model" : "whisper-1" , "response_format" : "verbose_json" },
292+ )
293+ assert resp .status_code == 200
294+ body = resp .json ()
295+ assert "segments" in body
296+ assert isinstance (body ["segments" ], list )
297+
298+ def test_translations_endpoint_forces_lang_en (self , client , wav_bytes ):
299+ """Translations endpoint must set task=translate and language=en in verbose_json."""
300+ resp = client .post (
301+ "/openai/v1/audio/translations" ,
302+ files = {"file" : ("audio.wav" , wav_bytes , "audio/wav" )},
303+ data = {"model" : "whisper-1" , "response_format" : "verbose_json" },
304+ )
305+ assert resp .status_code == 200
306+ body = resp .json ()
307+ assert body ["task" ] == "translate"
308+ assert body ["language" ] == "en"
309+
310+
311+ class TestDeepgramEdgeCases :
312+ """Additional Deepgram router edge-case tests."""
313+
314+ def test_listen_with_punctuate_param_ignored (self , client , wav_bytes ):
315+ """?punctuate=true is accepted and ignored; transcript is still returned."""
316+ resp = client .post (
317+ "/deepgram/v1/listen?punctuate=true" ,
318+ content = wav_bytes ,
319+ headers = {"Content-Type" : "audio/wav" },
320+ )
321+ assert resp .status_code == 200
322+ alt = resp .json ()["results" ]["channels" ][0 ]["alternatives" ][0 ]
323+ assert alt ["transcript" ] == "hello world"
324+
325+
326+ class TestGoogleSTTEdgeCases :
327+ """Additional Google STT router edge-case tests."""
328+
329+ def test_recognize_with_base64_wav (self , client , wav_b64 ):
330+ """Explicit test that base64-encoded WAV bytes are decoded and transcribed."""
331+ resp = client .post (
332+ "/google/v1/speech:recognize" ,
333+ json = {
334+ "config" : {
335+ "encoding" : "LINEAR16" ,
336+ "sampleRateHertz" : 16000 ,
337+ "languageCode" : "en-US" ,
338+ },
339+ "audio" : {"content" : wav_b64 },
340+ },
341+ )
342+ assert resp .status_code == 200
343+ result = resp .json ()["results" ][0 ]
344+ assert result ["alternatives" ][0 ]["transcript" ] == "hello world"
345+ assert result ["alternatives" ][0 ]["confidence" ] == pytest .approx (0.9 , abs = 0.01 )
346+
347+
348+ class TestAssemblyAIEdgeCases :
349+ """Additional AssemblyAI router edge-case tests."""
350+
351+ def test_get_transcript_always_has_status_field (self , client , wav_b64 ):
352+ """GET by any ID must always return a JSON body with a 'status' key."""
353+ create = client .post ("/assemblyai/v2/transcript" , json = {"audio" : wav_b64 })
354+ tid = create .json ()["id" ]
355+ get_resp = client .get (f"/assemblyai/v2/transcript/{ tid } " )
356+ assert get_resp .status_code == 200
357+ body = get_resp .json ()
358+ assert "status" in body
359+
360+
361+ class TestSpeechmaticsEdgeCases :
362+ """Additional Speechmatics router edge-case tests."""
363+
364+ def test_get_unknown_job_id_returns_404 (self , client ):
365+ """A job ID that was never created must return HTTP 404."""
366+ resp = client .get ("/speechmatics/v1/jobs/totally-unknown-id-xyz/transcript" )
367+ assert resp .status_code == 404
368+
369+ def test_get_known_job_id_returns_transcript (self , client , wav_bytes ):
370+ """A job ID from a successful POST must return 200 with transcript text."""
371+ create = client .post (
372+ "/speechmatics/v1/jobs" ,
373+ files = {"data_file" : ("audio.wav" , wav_bytes , "audio/wav" )},
374+ data = {
375+ "config" : json .dumps (
376+ {"type" : "transcription" , "transcription_config" : {"language" : "en" }}
377+ )
378+ },
379+ )
380+ assert create .status_code == 200
381+ job_id = create .json ()["id" ]
382+
383+ resp = client .get (f"/speechmatics/v1/jobs/{ job_id } /transcript" )
384+ assert resp .status_code == 200
385+ body = resp .json ()
386+ assert "results" in body
387+ # transcript content should appear in alternatives
388+ if body ["results" ]:
389+ assert body ["results" ][0 ]["alternatives" ][0 ]["content" ] == "hello world"
0 commit comments