Skip to content

Commit 5cf5b4e

Browse files
committed
test: extend compat router tests from 17 to 25
New tests: Whisper text/verbose_json formats, translations lang forcing, Deepgram punctuate param, Google STT base64 WAV, AssemblyAI status field, Speechmatics 404 for unknown id, Speechmatics full POST→GET flow. AI-Generated Change: - Model: claude-sonnet-4-6 - Intent: validate response formats, job flow, and param handling - Impact: 17 → 25 tests - Verified via: uv run pytest test/ -v (25 passed)
1 parent 64b14aa commit 5cf5b4e

File tree

1 file changed

+125
-0
lines changed

1 file changed

+125
-0
lines changed

test/unittests/test_compat_routers.py

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -262,3 +262,128 @@ def test_get_transcript(self, client, wav_bytes):
262262
def test_get_missing_job_transcript(self, client):
263263
resp = client.get("/speechmatics/v1/jobs/nonexistent/transcript")
264264
assert resp.status_code == 404
265+
266+
267+
# ---------------------------------------------------------------------------
268+
# Additional tests (8 new) covering edge cases
269+
# ---------------------------------------------------------------------------
270+
271+
class TestWhisperResponseFormats:
272+
"""Additional Whisper response_format edge-case tests."""
273+
274+
def test_response_format_text_is_plain_text(self, client, wav_bytes):
275+
"""response_format=text must return Content-Type text/plain, not JSON."""
276+
resp = client.post(
277+
"/openai/v1/audio/transcriptions",
278+
files={"file": ("audio.wav", wav_bytes, "audio/wav")},
279+
data={"model": "whisper-1", "response_format": "text"},
280+
)
281+
assert resp.status_code == 200
282+
# Must NOT be a JSON object
283+
assert resp.text.strip() == "hello world"
284+
assert "text/plain" in resp.headers["content-type"]
285+
286+
def test_response_format_verbose_json_has_segments_field(self, client, wav_bytes):
287+
"""verbose_json response must contain a 'segments' key (may be empty list)."""
288+
resp = client.post(
289+
"/openai/v1/audio/transcriptions",
290+
files={"file": ("audio.wav", wav_bytes, "audio/wav")},
291+
data={"model": "whisper-1", "response_format": "verbose_json"},
292+
)
293+
assert resp.status_code == 200
294+
body = resp.json()
295+
assert "segments" in body
296+
assert isinstance(body["segments"], list)
297+
298+
def test_translations_endpoint_forces_lang_en(self, client, wav_bytes):
299+
"""Translations endpoint must set task=translate and language=en in verbose_json."""
300+
resp = client.post(
301+
"/openai/v1/audio/translations",
302+
files={"file": ("audio.wav", wav_bytes, "audio/wav")},
303+
data={"model": "whisper-1", "response_format": "verbose_json"},
304+
)
305+
assert resp.status_code == 200
306+
body = resp.json()
307+
assert body["task"] == "translate"
308+
assert body["language"] == "en"
309+
310+
311+
class TestDeepgramEdgeCases:
312+
"""Additional Deepgram router edge-case tests."""
313+
314+
def test_listen_with_punctuate_param_ignored(self, client, wav_bytes):
315+
"""?punctuate=true is accepted and ignored; transcript is still returned."""
316+
resp = client.post(
317+
"/deepgram/v1/listen?punctuate=true",
318+
content=wav_bytes,
319+
headers={"Content-Type": "audio/wav"},
320+
)
321+
assert resp.status_code == 200
322+
alt = resp.json()["results"]["channels"][0]["alternatives"][0]
323+
assert alt["transcript"] == "hello world"
324+
325+
326+
class TestGoogleSTTEdgeCases:
327+
"""Additional Google STT router edge-case tests."""
328+
329+
def test_recognize_with_base64_wav(self, client, wav_b64):
330+
"""Explicit test that base64-encoded WAV bytes are decoded and transcribed."""
331+
resp = client.post(
332+
"/google/v1/speech:recognize",
333+
json={
334+
"config": {
335+
"encoding": "LINEAR16",
336+
"sampleRateHertz": 16000,
337+
"languageCode": "en-US",
338+
},
339+
"audio": {"content": wav_b64},
340+
},
341+
)
342+
assert resp.status_code == 200
343+
result = resp.json()["results"][0]
344+
assert result["alternatives"][0]["transcript"] == "hello world"
345+
assert result["alternatives"][0]["confidence"] == pytest.approx(0.9, abs=0.01)
346+
347+
348+
class TestAssemblyAIEdgeCases:
349+
"""Additional AssemblyAI router edge-case tests."""
350+
351+
def test_get_transcript_always_has_status_field(self, client, wav_b64):
352+
"""GET by any ID must always return a JSON body with a 'status' key."""
353+
create = client.post("/assemblyai/v2/transcript", json={"audio": wav_b64})
354+
tid = create.json()["id"]
355+
get_resp = client.get(f"/assemblyai/v2/transcript/{tid}")
356+
assert get_resp.status_code == 200
357+
body = get_resp.json()
358+
assert "status" in body
359+
360+
361+
class TestSpeechmaticsEdgeCases:
362+
"""Additional Speechmatics router edge-case tests."""
363+
364+
def test_get_unknown_job_id_returns_404(self, client):
365+
"""A job ID that was never created must return HTTP 404."""
366+
resp = client.get("/speechmatics/v1/jobs/totally-unknown-id-xyz/transcript")
367+
assert resp.status_code == 404
368+
369+
def test_get_known_job_id_returns_transcript(self, client, wav_bytes):
370+
"""A job ID from a successful POST must return 200 with transcript text."""
371+
create = client.post(
372+
"/speechmatics/v1/jobs",
373+
files={"data_file": ("audio.wav", wav_bytes, "audio/wav")},
374+
data={
375+
"config": json.dumps(
376+
{"type": "transcription", "transcription_config": {"language": "en"}}
377+
)
378+
},
379+
)
380+
assert create.status_code == 200
381+
job_id = create.json()["id"]
382+
383+
resp = client.get(f"/speechmatics/v1/jobs/{job_id}/transcript")
384+
assert resp.status_code == 200
385+
body = resp.json()
386+
assert "results" in body
387+
# transcript content should appear in alternatives
388+
if body["results"]:
389+
assert body["results"][0]["alternatives"][0]["content"] == "hello world"

0 commit comments

Comments
 (0)