-
Notifications
You must be signed in to change notification settings - Fork 1.5k
Closed
Labels
api: speechIssues related to the Speech-to-Text API.Issues related to the Speech-to-Text API.type: questionRequest for information or clarification. Not an issue.Request for information or clarification. Not an issue.
Description
Client
cloud.google.com/go/speech/apiv2/speech.Client
Environment
go1.25.5 darwin/arm64
Code and Dependencies
func (t *GoogleTranscriberV2) TranscribeStream(ctx context.Context, params *transcriber.TranscribeParams) (*transcriber.TranscribeResults, error) {
start := time.Now()
log.Infof("GoogleTranscriberV2.TranscribeStream Begin. params:%+v", params)
stream, err := t.client.StreamingRecognize(ctx)
log.Infof("GoogleTranscriberV2.TranscribeStream Connect. err:%v", err)
if err != nil {
return nil, err
}
// Send the initial configuration message.
configReq := &speechpb.StreamingRecognizeRequest{
Recognizer: t.getRecognizer(params.Locale),
StreamingRequest: &speechpb.StreamingRecognizeRequest_StreamingConfig{
StreamingConfig: &speechpb.StreamingRecognitionConfig{
Config: &speechpb.RecognitionConfig{
// TODO: Required if using headerless PCM audio (linear16, mulaw, alaw).
DecodingConfig: &speechpb.RecognitionConfig_ExplicitDecodingConfig{
ExplicitDecodingConfig: &speechpb.ExplicitDecodingConfig{
Encoding: convertFormat(params.Format),
SampleRateHertz: params.SampleRate,
AudioChannelCount: 1,
},
},
},
// TODO: Adaptation
StreamingFeatures: &speechpb.StreamingRecognitionFeatures{
// TODO: Even if set to true, still didn't receive intermediate results!
InterimResults: true,
},
},
},
}
err = stream.Send(configReq)
log.Infof("GoogleTranscriberV2.TranscribeStream SendConfig. req:%+v, err:%v", configReq, err)
if err != nil {
return nil, err
}
// sender goroutine
senderErrChan := make(chan error, 1)
go func() {
log.Infof("GoogleTranscriberV2.TranscribeStream Sender Start.")
sendCount := 0
size := 1024 * 8
buf := make([]byte, size)
for {
select {
case <-ctx.Done():
log.Infof("GoogleTranscriberV2.TranscribeStream Sender ContextCancelled. err:%v", ctx.Err())
senderErrChan <- ctx.Err()
return
default:
}
sendCount++
n, err := params.Audio.Read(buf)
log.Infof("GoogleTranscriberV2.TranscribeStream Sender ReadAudio. sendCount:%d, n:%d, err:%v", sendCount, n, err)
if n > 0 {
err := stream.Send(&speechpb.StreamingRecognizeRequest{
StreamingRequest: &speechpb.StreamingRecognizeRequest_Audio{
Audio: buf[:n],
},
})
log.Infof("GoogleTranscriberV2.TranscribeStream Sender SendAudio. sendCount:%d, n:%d, err:%v", sendCount, n, err)
if err != nil {
log.Errorf("GoogleTranscriberV2.TranscribeStream Sender SendAudioFailed. sendCount:%d, n:%d, err:%v", sendCount, n, err)
senderErrChan <- err
return
}
}
if err == io.EOF {
e := stream.CloseSend() // TODO:
log.Infof("GoogleTranscriberV2.TranscribeStream Sender CloseSend. err:%v", e)
if e != nil {
log.Errorf("GoogleTranscriberV2.TranscribeStream Sender CloseSendFailed. err:%v", e)
senderErrChan <- e
return
}
log.Infof("GoogleTranscriberV2.TranscribeStream Sender ReadAudioOver. sendCount:%d", sendCount)
return
}
if err != nil {
log.Errorf("GoogleTranscriberV2.TranscribeStream Sender ReadAudioFailed. sendCount:%d, err:%v", sendCount, err)
senderErrChan <- err
continue
}
}
}()
// receiver goroutine
receiverChan := make(chan *ReceiverResult, 1)
go func() {
log.Infof("GoogleTranscriberV2.TranscribeStream Receiver Start.")
var (
receiveCount int
lastInterimResultTime time.Time
finalText string
isComplete bool
receiveErr error
)
for {
select {
case err := <-senderErrChan:
if err != nil {
log.Errorf("GoogleTranscriberV2.TranscribeStream Receiver SenderFailed. err:%v", err)
receiveErr = err
}
default:
}
if receiveErr != nil {
break
}
select {
case <-ctx.Done():
log.Errorf("GoogleTranscriberV2.TranscribeStream Receiver ContextCancelled. err:%v", ctx.Err())
receiveErr = ctx.Err()
default:
}
if receiveErr != nil {
break
}
receiveCount++
resp, err := stream.Recv()
log.Infof("GoogleTranscriberV2.TranscribeStream Receiver Recv. receiveCount:%d, resp:%+v, err:%v", receiveCount, resp, err)
if err == io.EOF {
log.Errorf("GoogleTranscriberV2.TranscribeStream Receiver RecvEOF. receiveCount:%d", receiveCount)
break
}
if err != nil {
log.Errorf("GoogleTranscriberV2.TranscribeStream Receiver RecvFailed. receiveCount:%d, err:%v", receiveCount, err)
receiveErr = err
break
}
resultCount := 0
for _, result := range resp.GetResults() {
resultCount++
transcript := ""
if len(result.GetAlternatives()) > 0 {
alternative := result.GetAlternatives()[0]
if alternative != nil {
transcript = alternative.GetTranscript()
}
}
log.Infof("GoogleTranscriberV2.TranscribeStream Receiver RecvResult. receiveCount:%d, resultCount:%d, isFinal:%v, transcript:%+v",
receiveCount, resultCount, result.GetIsFinal(), transcript)
if !result.GetIsFinal() {
// interim result
if params.OnResult != nil {
if time.Since(lastInterimResultTime) > intermediateInterval {
params.OnResult(transcript, false)
lastInterimResultTime = time.Now()
}
}
} else {
// final result
finalText = transcript
if params.OnResult != nil {
params.OnResult(finalText, true)
}
isComplete = true
break
}
}
if isComplete {
break
}
}
log.Infof("GoogleTranscriberV2.TranscribeStream Receiver Over. finalText:%s, isComplete:%v, recvErr:%v", finalText, isComplete, receiveErr)
receiverChan <- &ReceiverResult{
Text: finalText,
IsComplete: isComplete,
Err: receiveErr,
}
}()
result := <-receiverChan
log.Infof("GoogleTranscriberV2.TranscribeStream Result. result:%+v", result)
text := ""
if result.Err == nil && result.IsComplete {
text = result.Text
}
duration := time.Since(start)
results := &transcriber.TranscribeResults{
Text: text,
Duration: duration,
Cost: 0, // TODO:
}
log.Infof("GoogleTranscriberV2.TranscribeStream End. results:%+v", results)
return results, nil
}go.mod
module myai
go 1.25.5
require (
cloud.google.com/go/speech v1.28.1
cloud.google.com/go/texttospeech v1.16.0
)
Expected behavior
During the process of sending audio data, intermediate transcription results should be received.
Actual behavior
Only received the final result, didn't receive the intermediate results.
Logs
2026-01-06T20:42:45.805+0800 INFO GoogleTranscriberV2.TranscribeStream Begin. params:&{Audio:0x14000180cc0 Format:pcm SampleRate:16000 Locale:zh OnResult:0x1043659e0 PhraseHints:[5] CustomClasses:map[게임용어:[레벨업 경험치 퀘스트 던전 보스 아이템] 도메인특화:[구독 프리미엄 Soul 캐릭터 대화] 캐릭터별호칭:[여보 자기야 달링 허니 베이비]]}
2026-01-06T20:42:45.809+0800 INFO GoogleTranscriberV2.TranscribeStream Connect. err:<nil>
2026-01-06T20:42:45.811+0800 INFO GoogleTranscriberV2.TranscribeStream SendConfig. req:recognizer:"projects/ucloudlink-411610/locations/asia-northeast1/recognizers/zh" streaming_config:{config:{explicit_decoding_config:{encoding:LINEAR16 sample_rate_hertz:16000 audio_channel_count:1}} streaming_features:{interim_results:true}}, err:<nil>
2026-01-06T20:42:45.811+0800 INFO GoogleTranscriberV2.TranscribeStream Receiver Start.
2026-01-06T20:42:45.811+0800 INFO GoogleTranscriberV2.TranscribeStream Sender Start.
2026-01-06T20:42:45.811+0800 INFO GoogleTranscriberV2.TranscribeStream Sender ReadAudio. sendCount:1, n:8192, err:<nil>
2026-01-06T20:42:45.811+0800 INFO GoogleTranscriberV2.TranscribeStream Sender SendAudio. sendCount:1, n:8192, err:<nil>
2026-01-06T20:42:45.954+0800 INFO GoogleTranscriberV2.TranscribeStream Sender ReadAudio. sendCount:2, n:8192, err:<nil>
2026-01-06T20:42:45.954+0800 INFO GoogleTranscriberV2.TranscribeStream Sender SendAudio. sendCount:2, n:8192, err:<nil>
2026-01-06T20:42:46.105+0800 INFO GoogleTranscriberV2.TranscribeStream Sender ReadAudio. sendCount:3, n:8192, err:<nil>
2026-01-06T20:42:46.105+0800 INFO GoogleTranscriberV2.TranscribeStream Sender SendAudio. sendCount:3, n:8192, err:<nil>
2026-01-06T20:42:46.258+0800 INFO GoogleTranscriberV2.TranscribeStream Sender ReadAudio. sendCount:4, n:8192, err:<nil>
2026-01-06T20:42:46.259+0800 INFO GoogleTranscriberV2.TranscribeStream Sender SendAudio. sendCount:4, n:8192, err:<nil>
2026-01-06T20:42:46.409+0800 INFO GoogleTranscriberV2.TranscribeStream Sender ReadAudio. sendCount:5, n:8192, err:<nil>
2026-01-06T20:42:46.409+0800 INFO GoogleTranscriberV2.TranscribeStream Sender SendAudio. sendCount:5, n:8192, err:<nil>
2026-01-06T20:42:46.560+0800 INFO GoogleTranscriberV2.TranscribeStream Sender ReadAudio. sendCount:6, n:8192, err:<nil>
2026-01-06T20:42:46.560+0800 INFO GoogleTranscriberV2.TranscribeStream Sender SendAudio. sendCount:6, n:8192, err:<nil>
2026-01-06T20:42:46.713+0800 INFO GoogleTranscriberV2.TranscribeStream Sender ReadAudio. sendCount:7, n:8192, err:<nil>
2026-01-06T20:42:46.714+0800 INFO GoogleTranscriberV2.TranscribeStream Sender SendAudio. sendCount:7, n:8192, err:<nil>
2026-01-06T20:42:46.863+0800 INFO GoogleTranscriberV2.TranscribeStream Sender ReadAudio. sendCount:8, n:8192, err:<nil>
2026-01-06T20:42:46.863+0800 INFO GoogleTranscriberV2.TranscribeStream Sender SendAudio. sendCount:8, n:8192, err:<nil>
2026-01-06T20:42:47.018+0800 INFO GoogleTranscriberV2.TranscribeStream Sender ReadAudio. sendCount:9, n:8192, err:<nil>
2026-01-06T20:42:47.019+0800 INFO GoogleTranscriberV2.TranscribeStream Sender SendAudio. sendCount:9, n:8192, err:<nil>
2026-01-06T20:42:47.177+0800 INFO GoogleTranscriberV2.TranscribeStream Sender ReadAudio. sendCount:10, n:8192, err:<nil>
2026-01-06T20:42:47.177+0800 INFO GoogleTranscriberV2.TranscribeStream Sender SendAudio. sendCount:10, n:8192, err:<nil>
2026-01-06T20:42:47.330+0800 INFO GoogleTranscriberV2.TranscribeStream Sender ReadAudio. sendCount:11, n:8192, err:<nil>
2026-01-06T20:42:47.330+0800 INFO GoogleTranscriberV2.TranscribeStream Sender SendAudio. sendCount:11, n:8192, err:<nil>
2026-01-06T20:42:47.496+0800 INFO GoogleTranscriberV2.TranscribeStream Sender ReadAudio. sendCount:12, n:8192, err:<nil>
2026-01-06T20:42:47.496+0800 INFO GoogleTranscriberV2.TranscribeStream Sender SendAudio. sendCount:12, n:8192, err:<nil>
2026-01-06T20:42:47.631+0800 INFO GoogleTranscriberV2.TranscribeStream Sender ReadAudio. sendCount:13, n:8192, err:<nil>
2026-01-06T20:42:47.631+0800 INFO GoogleTranscriberV2.TranscribeStream Sender SendAudio. sendCount:13, n:8192, err:<nil>
2026-01-06T20:42:47.782+0800 INFO GoogleTranscriberV2.TranscribeStream Sender ReadAudio. sendCount:14, n:8192, err:<nil>
2026-01-06T20:42:47.782+0800 INFO GoogleTranscriberV2.TranscribeStream Sender SendAudio. sendCount:14, n:8192, err:<nil>
2026-01-06T20:42:47.934+0800 INFO GoogleTranscriberV2.TranscribeStream Sender ReadAudio. sendCount:15, n:8192, err:<nil>
2026-01-06T20:42:47.935+0800 INFO GoogleTranscriberV2.TranscribeStream Sender SendAudio. sendCount:15, n:8192, err:<nil>
2026-01-06T20:42:48.085+0800 INFO GoogleTranscriberV2.TranscribeStream Sender ReadAudio. sendCount:16, n:8192, err:<nil>
2026-01-06T20:42:48.085+0800 INFO GoogleTranscriberV2.TranscribeStream Sender SendAudio. sendCount:16, n:8192, err:<nil>
2026-01-06T20:42:48.236+0800 INFO GoogleTranscriberV2.TranscribeStream Sender ReadAudio. sendCount:17, n:8192, err:<nil>
2026-01-06T20:42:48.236+0800 INFO GoogleTranscriberV2.TranscribeStream Sender SendAudio. sendCount:17, n:8192, err:<nil>
2026-01-06T20:42:48.389+0800 INFO GoogleTranscriberV2.TranscribeStream Sender ReadAudio. sendCount:18, n:6744, err:<nil>
2026-01-06T20:42:48.389+0800 INFO GoogleTranscriberV2.TranscribeStream Sender SendAudio. sendCount:18, n:6744, err:<nil>
2026-01-06T20:42:48.540+0800 INFO GoogleTranscriberV2.TranscribeStream Sender ReadAudio. sendCount:19, n:0, err:<nil>
2026-01-06T20:42:48.540+0800 INFO GoogleTranscriberV2.TranscribeStream Sender ReadAudio. sendCount:20, n:0, err:EOF
2026-01-06T20:42:48.540+0800 INFO GoogleTranscriberV2.TranscribeStream Sender CloseSend. err:<nil>
2026-01-06T20:42:48.540+0800 INFO GoogleTranscriberV2.TranscribeStream Sender ReadAudioOver. sendCount:20
2026-01-06T20:42:51.439+0800 INFO GoogleTranscriberV2.TranscribeStream Receiver Recv. receiveCount:1, resp:results:{alternatives:{transcript:"今年的最后一晚了,宝贝,要不要和我一起好好庆祝一下?"} is_final:true result_end_offset:{seconds:4 nanos:562749862} language_code:"cmn-Hans-CN"} metadata:{request_id:"69774bf6-0000-21e4-8aec-9898fbcda16d" total_billed_duration:{seconds:5}}, err:<nil>
2026-01-06T20:42:51.439+0800 INFO GoogleTranscriberV2.TranscribeStream Receiver RecvResult. receiveCount:1, resultCount:1, isFinal:true, transcript:今年的最后一晚了,宝贝,要不要和我一起好好庆祝一下?
2026-01-06T20:42:51.440+0800 INFO GoogleTranscriberV2.TranscribeStream Receiver Over. finalText:今年的最后一晚了,宝贝,要不要和我一起好好庆祝一下?, isComplete:true, recvErr:<nil>
2026-01-06T20:42:51.440+0800 INFO GoogleTranscriberV2.TranscribeStream Result. result:&{Text:今年的最后一晚了,宝贝,要不要和我一起好好庆祝一下? IsComplete:true Err:<nil>}
2026-01-06T20:42:51.440+0800 INFO GoogleTranscriberV2.TranscribeStream End. results:&{Text:今年的最后一晚了,宝贝,要不要和我一起好好庆祝一下? Duration:5.634542958s Cost:0}
Metadata
Metadata
Assignees
Labels
api: speechIssues related to the Speech-to-Text API.Issues related to the Speech-to-Text API.type: questionRequest for information or clarification. Not an issue.Request for information or clarification. Not an issue.