Skip to content

Commit 80a0053

Browse files
authored
Refactoring (#94)
* Updated error responses * Removed wav writer (unused) * Added OpenAI diarization
1 parent c43aa28 commit 80a0053

27 files changed

+680
-182
lines changed

README.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,14 +67,17 @@ Available local models can be downloaded from [Hugging Face](https://huggingface
6767
| GGML Whisper `*.bin` ||| ||
6868
| GGML Whisper `ggml-small.en-tdrz.bin`[^1] || |||
6969
| OpenAI `whisper-1` [^2] ||| | |
70-
| OpenAI `gpt-4o-*-transcribe` [^4],[^5] || | ||
70+
| OpenAI `gpt-4o-*-transcribe` [^4],[^5] || | |[^6] |
71+
| OpenAI `gpt-4o-transcribe-diarize` [^7] || |||
7172
| ElevenLabs `scribe_v1`,`scribe_v2` [^3] || || |
7273

7374
[^1]: <https://huggingface.co/akashmjn/tinydiarize-whisper.cpp>
7475
[^2]: <https://platform.openai.com/docs/models/whisper-1>
7576
[^3]: <https://elevenlabs.io/docs/models#scribe-v1>
7677
[^4]: <https://platform.openai.com/docs/models/gpt-4o-transcribe>
7778
[^5]: <https://platform.openai.com/docs/models/gpt-4o-mini-transcribe>
79+
[^6]: Realtime streaming with `gpt-4o-*-transcribe` models emits text deltas only (no timestamps)
80+
[^7]: Realtime streaming with `gpt-4o-transcribe-diarize` emits complete segments with speaker labels and timestamps
7881

7982
## Docker Deployment
8083

cmd/gowhisper/transcribe.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,14 @@ func (cmd *TranscribeCommand) Run(ctx *Globals) (err error) {
6666
return err
6767
}
6868

69-
// Add real-time segment printing callback
69+
// Add real-time segment printing callback with ID tracking
70+
var segmentId int32
7071
opts = append(opts, httpclient.WithSegmentCallback(func(seg *schema.Segment) error {
72+
// Assign incrementing ID if segment has no ID
73+
if seg.Id == 0 {
74+
seg.Id = segmentId
75+
}
76+
segmentId++
7177
writeSegment(os.Stdout, seg, format)
7278
return nil
7379
}))

cmd/gowhisper/translate.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,14 @@ func (cmd *TranslateCommand) Run(ctx *Globals) (err error) {
6060
return err
6161
}
6262

63-
// Add real-time segment printing callback
63+
// Add real-time segment printing callback with ID tracking
64+
var segmentId int32
6465
opts = append(opts, httpclient.WithSegmentCallback(func(seg *schema.Segment) error {
66+
// Assign incrementing ID if segment has no ID
67+
if seg.Id == 0 {
68+
seg.Id = segmentId
69+
}
70+
segmentId++
6571
writeSegment(os.Stdout, seg, format)
6672
return nil
6773
}))

go.mod

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,10 @@ toolchain go1.24.2
77
require (
88
github.com/alecthomas/kong v1.13.0
99
github.com/djthorpe/go-errors v1.0.3
10-
github.com/go-audio/audio v1.0.0
1110
github.com/go-audio/wav v1.1.0
1211
github.com/mutablelogic/go-client v1.3.5
1312
github.com/mutablelogic/go-media v1.8.3
1413
github.com/mutablelogic/go-server v1.5.18
15-
github.com/orcaman/writerseeker v0.0.0-20200621085525-1d3f536ff85e
1614
github.com/stretchr/testify v1.11.1
1715
go.opentelemetry.io/otel v1.39.0
1816
go.opentelemetry.io/otel/trace v1.39.0
@@ -25,6 +23,7 @@ require (
2523
github.com/cespare/xxhash/v2 v2.3.0 // indirect
2624
github.com/davecgh/go-spew v1.1.1 // indirect
2725
github.com/go-asn1-ber/asn1-ber v1.5.8-0.20250403174932-29230038a667 // indirect
26+
github.com/go-audio/audio v1.0.0 // indirect
2827
github.com/go-audio/riff v1.0.0 // indirect
2928
github.com/go-ldap/ldap/v3 v3.4.12 // indirect
3029
github.com/go-logr/logr v1.4.3 // indirect

go.sum

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,8 +135,6 @@ github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8
135135
github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
136136
github.com/opencontainers/image-spec v1.1.1 h1:y0fUlFfIZhPF1W537XOLg0/fcx6zcHCJwooC2xJA040=
137137
github.com/opencontainers/image-spec v1.1.1/go.mod h1:qpqAh3Dmcf36wStyyWU+kCeDgrGnAve2nCC8+7h8Q0M=
138-
github.com/orcaman/writerseeker v0.0.0-20200621085525-1d3f536ff85e h1:s2RNOM/IGdY0Y6qfTeUKhDawdHDpK9RGBdx80qN4Ttw=
139-
github.com/orcaman/writerseeker v0.0.0-20200621085525-1d3f536ff85e/go.mod h1:nBdnFKj15wFbf94Rwfq4m30eAcyY9V/IyKAGQFtqkW0=
140138
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
141139
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
142140
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=

pkg/elevenlabs/transcribe.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ func (c *Client) Transcribe(ctx context.Context, req TranscribeRequest) (*Transc
3434
}
3535

3636
// Create multipart request, and execute it
37-
if payload, err := client.NewMultipartRequest(req, client.ContentTypeAny); err != nil {
37+
if payload, err := client.NewStreamingMultipartRequest(req, client.ContentTypeAny); err != nil {
3838
return nil, err
3939
} else if err := c.DoWithContext(ctx, payload, &response, client.OptPath(TranscribePath)); err != nil {
4040
return nil, err

pkg/httpclient/transcribe.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,12 +56,15 @@ func (c *Client) Transcribe(ctx context.Context, model string, r io.Reader, opts
5656
if opt.segmentCallback != nil {
5757
reqOpts = append(reqOpts, client.OptReqHeader("Accept", "text/event-stream"))
5858
reqOpts = append(reqOpts, client.OptTextStreamCallback(func(evt client.TextStreamEvent) error {
59-
var segment schema.Segment
60-
if evt.Event == schema.TranscribeStreamDeltaType {
59+
switch evt.Event {
60+
case schema.TranscribeStreamDeltaType:
61+
var segment schema.Segment
6162
if err := evt.Json(&segment); err != nil {
6263
return err
6364
}
6465
return opt.segmentCallback(&segment)
66+
case schema.TranscribeStreamErrorType:
67+
return fmt.Errorf("transcribe error: %s", evt.Data)
6568
}
6669
return nil
6770
}))

pkg/httpclient/translate.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,12 +63,15 @@ func (c *Client) Translate(ctx context.Context, model string, r io.Reader, opts
6363
if opt.segmentCallback != nil {
6464
reqOpts = append(reqOpts, client.OptReqHeader("Accept", "text/event-stream"))
6565
reqOpts = append(reqOpts, client.OptTextStreamCallback(func(evt client.TextStreamEvent) error {
66-
var segment schema.Segment
67-
if evt.Event == schema.TranscribeStreamDeltaType {
66+
switch evt.Event {
67+
case schema.TranscribeStreamDeltaType:
68+
var segment schema.Segment
6869
if err := evt.Json(&segment); err != nil {
6970
return err
7071
}
7172
return opt.segmentCallback(&segment)
73+
case schema.TranscribeStreamErrorType:
74+
return fmt.Errorf("translate error: %s", evt.Data)
7275
}
7376
return nil
7477
}))

pkg/httphandler/httphandler.go

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,6 @@ import (
88
httpresponse "github.com/mutablelogic/go-server/pkg/httpresponse"
99
types "github.com/mutablelogic/go-server/pkg/types"
1010
pkg "github.com/mutablelogic/go-whisper/pkg"
11-
12-
// Namespace imports
13-
. "github.com/djthorpe/go-errors"
1411
)
1512

1613
///////////////////////////////////////////////////////////////////////////////
@@ -60,13 +57,6 @@ func httperr(err error) error {
6057
return err
6158
}
6259

63-
// Map pkg errors to HTTP errors
64-
switch {
65-
case errors.Is(err, ErrNotFound):
66-
return httpresponse.ErrNotFound.With(err.Error())
67-
case errors.Is(err, ErrBadParameter):
68-
return httpresponse.ErrBadRequest.With(err.Error())
69-
default:
70-
return httpresponse.ErrInternalError.With(err.Error())
71-
}
60+
// Map remaining errors to "Internal Error" HTTP errors
61+
return httpresponse.ErrInternalError.With(err.Error())
7262
}

pkg/httphandler/transcribe.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,11 @@ func transcribeCreate(w http.ResponseWriter, r *http.Request, manager *pkg.Manag
4343
return httpresponse.Error(w, httpresponse.ErrBadRequest.With("missing or invalid audio field"))
4444
}
4545

46+
// Copy filename from multipart audio field if not explicitly set
47+
if req.Audio.Path != "" && (req.Filename == nil || *req.Filename == "") {
48+
req.Filename = &req.Audio.Path
49+
}
50+
4651
// Create text stream if requested
4752
var stream *httpresponse.TextStream
4853
var mimetype string

0 commit comments

Comments
 (0)