Skip to content

Commit 515de02

Browse files
feat: initial TTS support (#528)
* feat: initial TTS support * chore: lint, omitempty * chore: dont use pointer in struct * fix: add mocked server tests to speech_test.go Co-authored-by: Lachlan Laycock <[email protected]> * chore: update imports * chore: fix lint * chore: add an error check * chore: ignore lint * chore: add error checks in package * chore: add test * chore: fix test --------- Co-authored-by: Lachlan Laycock <[email protected]>
1 parent b7cac70 commit 515de02

File tree

3 files changed

+205
-0
lines changed

3 files changed

+205
-0
lines changed

client_test.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,9 @@ func TestClientReturnsRequestBuilderErrors(t *testing.T) {
358358
{"ListRunSteps", func() (any, error) {
359359
return client.ListRunSteps(ctx, "", "", Pagination{})
360360
}},
361+
{"CreateSpeech", func() (any, error) {
362+
return client.CreateSpeech(ctx, CreateSpeechRequest{Model: TTSModel1, Voice: VoiceAlloy})
363+
}},
361364
}
362365

363366
for _, testCase := range testCases {

speech.go

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
package openai
2+
3+
import (
4+
"context"
5+
"errors"
6+
"io"
7+
"net/http"
8+
)
9+
10+
type SpeechModel string
11+
12+
const (
13+
TTSModel1 SpeechModel = "tts-1"
14+
TTsModel1HD SpeechModel = "tts-1-hd"
15+
)
16+
17+
type SpeechVoice string
18+
19+
const (
20+
VoiceAlloy SpeechVoice = "alloy"
21+
VoiceEcho SpeechVoice = "echo"
22+
VoiceFable SpeechVoice = "fable"
23+
VoiceOnyx SpeechVoice = "onyx"
24+
VoiceNova SpeechVoice = "nova"
25+
VoiceShimmer SpeechVoice = "shimmer"
26+
)
27+
28+
type SpeechResponseFormat string
29+
30+
const (
31+
SpeechResponseFormatMp3 SpeechResponseFormat = "mp3"
32+
SpeechResponseFormatOpus SpeechResponseFormat = "opus"
33+
SpeechResponseFormatAac SpeechResponseFormat = "aac"
34+
SpeechResponseFormatFlac SpeechResponseFormat = "flac"
35+
)
36+
37+
var (
38+
ErrInvalidSpeechModel = errors.New("invalid speech model")
39+
ErrInvalidVoice = errors.New("invalid voice")
40+
)
41+
42+
type CreateSpeechRequest struct {
43+
Model SpeechModel `json:"model"`
44+
Input string `json:"input"`
45+
Voice SpeechVoice `json:"voice"`
46+
ResponseFormat SpeechResponseFormat `json:"response_format,omitempty"` // Optional, default to mp3
47+
Speed float64 `json:"speed,omitempty"` // Optional, default to 1.0
48+
}
49+
50+
func contains[T comparable](s []T, e T) bool {
51+
for _, v := range s {
52+
if v == e {
53+
return true
54+
}
55+
}
56+
return false
57+
}
58+
59+
func isValidSpeechModel(model SpeechModel) bool {
60+
return contains([]SpeechModel{TTSModel1, TTsModel1HD}, model)
61+
}
62+
63+
func isValidVoice(voice SpeechVoice) bool {
64+
return contains([]SpeechVoice{VoiceAlloy, VoiceEcho, VoiceFable, VoiceOnyx, VoiceNova, VoiceShimmer}, voice)
65+
}
66+
67+
func (c *Client) CreateSpeech(ctx context.Context, request CreateSpeechRequest) (response io.ReadCloser, err error) {
68+
if !isValidSpeechModel(request.Model) {
69+
err = ErrInvalidSpeechModel
70+
return
71+
}
72+
if !isValidVoice(request.Voice) {
73+
err = ErrInvalidVoice
74+
return
75+
}
76+
req, err := c.newRequest(ctx, http.MethodPost, c.fullURL("/audio/speech", request.Model),
77+
withBody(request),
78+
withContentType("application/json; charset=utf-8"),
79+
)
80+
if err != nil {
81+
return
82+
}
83+
84+
response, err = c.sendRequestRaw(req)
85+
86+
return
87+
}

speech_test.go

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
package openai_test
2+
3+
import (
4+
"context"
5+
"encoding/json"
6+
"fmt"
7+
"io"
8+
"mime"
9+
"net/http"
10+
"os"
11+
"path/filepath"
12+
"testing"
13+
14+
"github.com/sashabaranov/go-openai"
15+
"github.com/sashabaranov/go-openai/internal/test"
16+
"github.com/sashabaranov/go-openai/internal/test/checks"
17+
)
18+
19+
func TestSpeechIntegration(t *testing.T) {
20+
client, server, teardown := setupOpenAITestServer()
21+
defer teardown()
22+
23+
server.RegisterHandler("/v1/audio/speech", func(w http.ResponseWriter, r *http.Request) {
24+
dir, cleanup := test.CreateTestDirectory(t)
25+
path := filepath.Join(dir, "fake.mp3")
26+
test.CreateTestFile(t, path)
27+
defer cleanup()
28+
29+
// audio endpoints only accept POST requests
30+
if r.Method != "POST" {
31+
http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
32+
return
33+
}
34+
35+
mediaType, _, err := mime.ParseMediaType(r.Header.Get("Content-Type"))
36+
if err != nil {
37+
http.Error(w, "failed to parse media type", http.StatusBadRequest)
38+
return
39+
}
40+
41+
if mediaType != "application/json" {
42+
http.Error(w, "request is not json", http.StatusBadRequest)
43+
return
44+
}
45+
46+
// Parse the JSON body of the request
47+
var params map[string]interface{}
48+
err = json.NewDecoder(r.Body).Decode(&params)
49+
if err != nil {
50+
http.Error(w, "failed to parse request body", http.StatusBadRequest)
51+
return
52+
}
53+
54+
// Check if each required field is present in the parsed JSON object
55+
reqParams := []string{"model", "input", "voice"}
56+
for _, param := range reqParams {
57+
_, ok := params[param]
58+
if !ok {
59+
http.Error(w, fmt.Sprintf("no %s in params", param), http.StatusBadRequest)
60+
return
61+
}
62+
}
63+
64+
// read audio file content
65+
audioFile, err := os.ReadFile(path)
66+
if err != nil {
67+
http.Error(w, "failed to read audio file", http.StatusInternalServerError)
68+
return
69+
}
70+
71+
// write audio file content to response
72+
w.Header().Set("Content-Type", "audio/mpeg")
73+
w.Header().Set("Transfer-Encoding", "chunked")
74+
w.Header().Set("Connection", "keep-alive")
75+
_, err = w.Write(audioFile)
76+
if err != nil {
77+
http.Error(w, "failed to write body", http.StatusInternalServerError)
78+
return
79+
}
80+
})
81+
82+
t.Run("happy path", func(t *testing.T) {
83+
res, err := client.CreateSpeech(context.Background(), openai.CreateSpeechRequest{
84+
Model: openai.TTSModel1,
85+
Input: "Hello!",
86+
Voice: openai.VoiceAlloy,
87+
})
88+
checks.NoError(t, err, "CreateSpeech error")
89+
defer res.Close()
90+
91+
buf, err := io.ReadAll(res)
92+
checks.NoError(t, err, "ReadAll error")
93+
94+
// save buf to file as mp3
95+
err = os.WriteFile("test.mp3", buf, 0644)
96+
checks.NoError(t, err, "Create error")
97+
})
98+
t.Run("invalid model", func(t *testing.T) {
99+
_, err := client.CreateSpeech(context.Background(), openai.CreateSpeechRequest{
100+
Model: "invalid_model",
101+
Input: "Hello!",
102+
Voice: openai.VoiceAlloy,
103+
})
104+
checks.ErrorIs(t, err, openai.ErrInvalidSpeechModel, "CreateSpeech error")
105+
})
106+
107+
t.Run("invalid voice", func(t *testing.T) {
108+
_, err := client.CreateSpeech(context.Background(), openai.CreateSpeechRequest{
109+
Model: openai.TTSModel1,
110+
Input: "Hello!",
111+
Voice: "invalid_voice",
112+
})
113+
checks.ErrorIs(t, err, openai.ErrInvalidVoice, "CreateSpeech error")
114+
})
115+
}

0 commit comments

Comments
 (0)