Skip to content

Commit cbc742a

Browse files
authored
Get audio stuff working. (#245)
* Initially getting things working. * More closely match spec
1 parent 7911291 commit cbc742a

File tree

5 files changed

+22
-23
lines changed

5 files changed

+22
-23
lines changed

speech.mp3

-460 KB
Binary file not shown.

src/together/abstract/api_requestor.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ def parse_stream_helper(line: bytes) -> str | None:
7878
line = line[len(b"data: ") :]
7979
else:
8080
line = line[len(b"data:") :]
81-
if line.strip() == b"[DONE]":
81+
if line.strip().upper() == b"[DONE]":
8282
# return here will cause GeneratorExit exception in urllib3
8383
# and it will close http connection with TCP Reset
8484
return None
@@ -620,17 +620,22 @@ def _interpret_response(
620620
self, result: requests.Response, stream: bool
621621
) -> Tuple[TogetherResponse | Iterator[TogetherResponse], bool]:
622622
"""Returns the response(s) and a bool indicating whether it is a stream."""
623-
if stream and "text/event-stream" in result.headers.get("Content-Type", ""):
623+
content_type = result.headers.get("Content-Type", "")
624+
if stream and "text/event-stream" in content_type:
624625
return (
625626
self._interpret_response_line(
626627
line, result.status_code, result.headers, stream=True
627628
)
628629
for line in parse_stream(result.iter_lines())
629630
), True
630631
else:
632+
if content_type in ["application/octet-stream", "audio/wav", "audio/mpeg"]:
633+
content = result.content
634+
else:
635+
content = result.content.decode("utf-8")
631636
return (
632637
self._interpret_response_line(
633-
result.content.decode("utf-8"),
638+
content,
634639
result.status_code,
635640
result.headers,
636641
stream=False,
@@ -670,7 +675,7 @@ async def _interpret_async_response(
670675
)
671676

672677
def _interpret_response_line(
673-
self, rbody: str, rcode: int, rheaders: Any, stream: bool
678+
self, rbody: str | bytes, rcode: int, rheaders: Any, stream: bool
674679
) -> TogetherResponse:
675680
# HTTP 204 response code does not have any content in the body.
676681
if rcode == 204:
@@ -684,8 +689,11 @@ def _interpret_response_line(
684689
)
685690

686691
try:
687-
if "text/plain" in rheaders.get("Content-Type", ""):
692+
content_type = rheaders.get("Content-Type", "")
693+
if "text/plain" in content_type:
688694
data: Dict[str, Any] = {"message": rbody}
695+
elif content_type in ["application/octet-stream", "audio/wav", "audio/mpeg"]:
696+
data = rbody
689697
else:
690698
data = json.loads(rbody)
691699
except (JSONDecodeError, UnicodeDecodeError) as e:

src/together/resources/audio/speech.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,4 +150,4 @@ async def create(
150150
stream=stream,
151151
)
152152

153-
# return AudioSpeechStreamResponse(response=response)
153+
return AudioSpeechStreamResponse(response=response)

src/together/types/audio_speech.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -65,14 +65,14 @@ class AudioSpeechStreamChunk(BaseModel):
6565
model: str
6666
b64: str
6767

68-
6968
class AudioSpeechStreamEvent(BaseModel):
7069
data: AudioSpeechStreamChunk
7170

72-
7371
class StreamSentinel(BaseModel):
7472
data: StreamSentinelType = StreamSentinelType.DONE
7573

74+
class AudioSpeechStreamEventResponse(BaseModel):
75+
response: AudioSpeechStreamEvent | StreamSentinel
7676

7777
class AudioSpeechStreamResponse(BaseModel):
7878

@@ -92,9 +92,13 @@ def stream_to_file(self, file_path: str) -> None:
9292
with open(file_path, "wb") as f:
9393
for chunk in self.response:
9494

95-
data = AudioSpeechStreamChunk(**chunk.data)
95+
# Try to parse as stream chunk
96+
stream_event_response = AudioSpeechStreamEventResponse(response={"data": chunk.data})
97+
98+
if isinstance(stream_event_response.response, StreamSentinel):
99+
break
96100

97101
# decode base64
98-
audio = base64.b64decode(data.b64)
102+
audio = base64.b64decode(stream_event_response.response.data.b64)
99103

100104
f.write(audio)

test.py

Lines changed: 0 additions & 13 deletions
This file was deleted.

0 commit comments

Comments
 (0)