Skip to content

cloud.google.com/go/speech/apiv2: StreamingFeatures.InterimResults not working #13543

@furucong

Description

@furucong

Client

cloud.google.com/go/speech/apiv2/speech.Client

Environment

go1.25.5 darwin/arm64

Code and Dependencies

func (t *GoogleTranscriberV2) TranscribeStream(ctx context.Context, params *transcriber.TranscribeParams) (*transcriber.TranscribeResults, error) {
	start := time.Now()
	log.Infof("GoogleTranscriberV2.TranscribeStream Begin. params:%+v", params)
	stream, err := t.client.StreamingRecognize(ctx)
	log.Infof("GoogleTranscriberV2.TranscribeStream Connect. err:%v", err)
	if err != nil {
		return nil, err
	}
	// Send the initial configuration message.
	configReq := &speechpb.StreamingRecognizeRequest{
		Recognizer: t.getRecognizer(params.Locale),
		StreamingRequest: &speechpb.StreamingRecognizeRequest_StreamingConfig{
			StreamingConfig: &speechpb.StreamingRecognitionConfig{
				Config: &speechpb.RecognitionConfig{
					// TODO: Required if using headerless PCM audio (linear16, mulaw, alaw).
					DecodingConfig: &speechpb.RecognitionConfig_ExplicitDecodingConfig{
						ExplicitDecodingConfig: &speechpb.ExplicitDecodingConfig{
							Encoding:          convertFormat(params.Format),
							SampleRateHertz:   params.SampleRate,
							AudioChannelCount: 1,
						},
					},
				},
				// TODO: Adaptation
				StreamingFeatures: &speechpb.StreamingRecognitionFeatures{
					// TODO: Even if set to true, still didn't receive intermediate results!
					InterimResults: true,
				},
			},
		},
	}
	err = stream.Send(configReq)
	log.Infof("GoogleTranscriberV2.TranscribeStream SendConfig. req:%+v, err:%v", configReq, err)
	if err != nil {
		return nil, err
	}
	// sender goroutine
	senderErrChan := make(chan error, 1)
	go func() {
		log.Infof("GoogleTranscriberV2.TranscribeStream Sender Start.")
		sendCount := 0
		size := 1024 * 8
		buf := make([]byte, size)

		for {
			select {
			case <-ctx.Done():
				log.Infof("GoogleTranscriberV2.TranscribeStream Sender ContextCancelled. err:%v", ctx.Err())
				senderErrChan <- ctx.Err()
				return
			default:
			}

			sendCount++
			n, err := params.Audio.Read(buf)
			log.Infof("GoogleTranscriberV2.TranscribeStream Sender ReadAudio. sendCount:%d, n:%d, err:%v", sendCount, n, err)
			if n > 0 {
				err := stream.Send(&speechpb.StreamingRecognizeRequest{
					StreamingRequest: &speechpb.StreamingRecognizeRequest_Audio{
						Audio: buf[:n],
					},
				})
				log.Infof("GoogleTranscriberV2.TranscribeStream Sender SendAudio. sendCount:%d, n:%d, err:%v", sendCount, n, err)
				if err != nil {
					log.Errorf("GoogleTranscriberV2.TranscribeStream Sender SendAudioFailed. sendCount:%d, n:%d, err:%v", sendCount, n, err)
					senderErrChan <- err
					return
				}
			}
			if err == io.EOF {
				e := stream.CloseSend() // TODO:
				log.Infof("GoogleTranscriberV2.TranscribeStream Sender CloseSend. err:%v", e)
				if e != nil {
					log.Errorf("GoogleTranscriberV2.TranscribeStream Sender CloseSendFailed. err:%v", e)
					senderErrChan <- e
					return
				}
				log.Infof("GoogleTranscriberV2.TranscribeStream Sender ReadAudioOver. sendCount:%d", sendCount)
				return
			}
			if err != nil {
				log.Errorf("GoogleTranscriberV2.TranscribeStream Sender ReadAudioFailed. sendCount:%d, err:%v", sendCount, err)
				senderErrChan <- err
				continue
			}
		}
	}()

	// receiver goroutine
	receiverChan := make(chan *ReceiverResult, 1)
	go func() {
		log.Infof("GoogleTranscriberV2.TranscribeStream Receiver Start.")
		var (
			receiveCount          int
			lastInterimResultTime time.Time
			finalText             string
			isComplete            bool
			receiveErr            error
		)
		for {
			select {
			case err := <-senderErrChan:
				if err != nil {
					log.Errorf("GoogleTranscriberV2.TranscribeStream Receiver SenderFailed. err:%v", err)
					receiveErr = err
				}
			default:
			}
			if receiveErr != nil {
				break
			}

			select {
			case <-ctx.Done():
				log.Errorf("GoogleTranscriberV2.TranscribeStream Receiver ContextCancelled. err:%v", ctx.Err())
				receiveErr = ctx.Err()
			default:
			}
			if receiveErr != nil {
				break
			}

			receiveCount++
			resp, err := stream.Recv()
			log.Infof("GoogleTranscriberV2.TranscribeStream Receiver Recv. receiveCount:%d, resp:%+v, err:%v", receiveCount, resp, err)
			if err == io.EOF {
				log.Errorf("GoogleTranscriberV2.TranscribeStream Receiver RecvEOF. receiveCount:%d", receiveCount)
				break
			}
			if err != nil {
				log.Errorf("GoogleTranscriberV2.TranscribeStream Receiver RecvFailed. receiveCount:%d, err:%v", receiveCount, err)
				receiveErr = err
				break
			}

			resultCount := 0
			for _, result := range resp.GetResults() {
				resultCount++
				transcript := ""
				if len(result.GetAlternatives()) > 0 {
					alternative := result.GetAlternatives()[0]
					if alternative != nil {
						transcript = alternative.GetTranscript()
					}
				}
				log.Infof("GoogleTranscriberV2.TranscribeStream Receiver RecvResult. receiveCount:%d, resultCount:%d, isFinal:%v, transcript:%+v",
					receiveCount, resultCount, result.GetIsFinal(), transcript)
				if !result.GetIsFinal() {
					// interim result
					if params.OnResult != nil {
						if time.Since(lastInterimResultTime) > intermediateInterval {
							params.OnResult(transcript, false)
							lastInterimResultTime = time.Now()
						}
					}
				} else {
					// final result
					finalText = transcript
					if params.OnResult != nil {
						params.OnResult(finalText, true)
					}
					isComplete = true
					break
				}
			}
			if isComplete {
				break
			}
		}
		log.Infof("GoogleTranscriberV2.TranscribeStream Receiver Over. finalText:%s, isComplete:%v, recvErr:%v", finalText, isComplete, receiveErr)
		receiverChan <- &ReceiverResult{
			Text:       finalText,
			IsComplete: isComplete,
			Err:        receiveErr,
		}
	}()

	result := <-receiverChan
	log.Infof("GoogleTranscriberV2.TranscribeStream Result. result:%+v", result)
	text := ""
	if result.Err == nil && result.IsComplete {
		text = result.Text
	}
	duration := time.Since(start)
	results := &transcriber.TranscribeResults{
		Text:     text,
		Duration: duration,
		Cost:     0, // TODO:
	}
	log.Infof("GoogleTranscriberV2.TranscribeStream End. results:%+v", results)
	return results, nil
}
go.mod
module myai

go 1.25.5

require (
	cloud.google.com/go/speech v1.28.1
	cloud.google.com/go/texttospeech v1.16.0
)

Expected behavior

During the process of sending audio data, intermediate transcription results should be received.

Actual behavior

Only received the final result, didn't receive the intermediate results.

Logs

2026-01-06T20:42:45.805+0800    INFO    GoogleTranscriberV2.TranscribeStream Begin. params:&{Audio:0x14000180cc0 Format:pcm SampleRate:16000 Locale:zh OnResult:0x1043659e0 PhraseHints:[5] CustomClasses:map[게임용어:[레벨업 경험치 퀘스트 던전 보스 아이템] 도메인특화:[구독 프리미엄 Soul 캐릭터 대화] 캐릭터별호칭:[여보 자기야 달링 허니 베이비]]}
2026-01-06T20:42:45.809+0800    INFO    GoogleTranscriberV2.TranscribeStream Connect. err:<nil>
2026-01-06T20:42:45.811+0800    INFO    GoogleTranscriberV2.TranscribeStream SendConfig. req:recognizer:"projects/ucloudlink-411610/locations/asia-northeast1/recognizers/zh"  streaming_config:{config:{explicit_decoding_config:{encoding:LINEAR16  sample_rate_hertz:16000  audio_channel_count:1}}  streaming_features:{interim_results:true}}, err:<nil>
2026-01-06T20:42:45.811+0800    INFO    GoogleTranscriberV2.TranscribeStream Receiver Start.
2026-01-06T20:42:45.811+0800    INFO    GoogleTranscriberV2.TranscribeStream Sender Start.
2026-01-06T20:42:45.811+0800    INFO    GoogleTranscriberV2.TranscribeStream Sender ReadAudio. sendCount:1, n:8192, err:<nil>
2026-01-06T20:42:45.811+0800    INFO    GoogleTranscriberV2.TranscribeStream Sender SendAudio. sendCount:1, n:8192, err:<nil>
2026-01-06T20:42:45.954+0800    INFO    GoogleTranscriberV2.TranscribeStream Sender ReadAudio. sendCount:2, n:8192, err:<nil>
2026-01-06T20:42:45.954+0800    INFO    GoogleTranscriberV2.TranscribeStream Sender SendAudio. sendCount:2, n:8192, err:<nil>
2026-01-06T20:42:46.105+0800    INFO    GoogleTranscriberV2.TranscribeStream Sender ReadAudio. sendCount:3, n:8192, err:<nil>
2026-01-06T20:42:46.105+0800    INFO    GoogleTranscriberV2.TranscribeStream Sender SendAudio. sendCount:3, n:8192, err:<nil>
2026-01-06T20:42:46.258+0800    INFO    GoogleTranscriberV2.TranscribeStream Sender ReadAudio. sendCount:4, n:8192, err:<nil>
2026-01-06T20:42:46.259+0800    INFO    GoogleTranscriberV2.TranscribeStream Sender SendAudio. sendCount:4, n:8192, err:<nil>
2026-01-06T20:42:46.409+0800    INFO    GoogleTranscriberV2.TranscribeStream Sender ReadAudio. sendCount:5, n:8192, err:<nil>
2026-01-06T20:42:46.409+0800    INFO    GoogleTranscriberV2.TranscribeStream Sender SendAudio. sendCount:5, n:8192, err:<nil>
2026-01-06T20:42:46.560+0800    INFO    GoogleTranscriberV2.TranscribeStream Sender ReadAudio. sendCount:6, n:8192, err:<nil>
2026-01-06T20:42:46.560+0800    INFO    GoogleTranscriberV2.TranscribeStream Sender SendAudio. sendCount:6, n:8192, err:<nil>
2026-01-06T20:42:46.713+0800    INFO    GoogleTranscriberV2.TranscribeStream Sender ReadAudio. sendCount:7, n:8192, err:<nil>
2026-01-06T20:42:46.714+0800    INFO    GoogleTranscriberV2.TranscribeStream Sender SendAudio. sendCount:7, n:8192, err:<nil>
2026-01-06T20:42:46.863+0800    INFO    GoogleTranscriberV2.TranscribeStream Sender ReadAudio. sendCount:8, n:8192, err:<nil>
2026-01-06T20:42:46.863+0800    INFO    GoogleTranscriberV2.TranscribeStream Sender SendAudio. sendCount:8, n:8192, err:<nil>
2026-01-06T20:42:47.018+0800    INFO    GoogleTranscriberV2.TranscribeStream Sender ReadAudio. sendCount:9, n:8192, err:<nil>
2026-01-06T20:42:47.019+0800    INFO    GoogleTranscriberV2.TranscribeStream Sender SendAudio. sendCount:9, n:8192, err:<nil>
2026-01-06T20:42:47.177+0800    INFO    GoogleTranscriberV2.TranscribeStream Sender ReadAudio. sendCount:10, n:8192, err:<nil>
2026-01-06T20:42:47.177+0800    INFO    GoogleTranscriberV2.TranscribeStream Sender SendAudio. sendCount:10, n:8192, err:<nil>
2026-01-06T20:42:47.330+0800    INFO    GoogleTranscriberV2.TranscribeStream Sender ReadAudio. sendCount:11, n:8192, err:<nil>
2026-01-06T20:42:47.330+0800    INFO    GoogleTranscriberV2.TranscribeStream Sender SendAudio. sendCount:11, n:8192, err:<nil>
2026-01-06T20:42:47.496+0800    INFO    GoogleTranscriberV2.TranscribeStream Sender ReadAudio. sendCount:12, n:8192, err:<nil>
2026-01-06T20:42:47.496+0800    INFO    GoogleTranscriberV2.TranscribeStream Sender SendAudio. sendCount:12, n:8192, err:<nil>
2026-01-06T20:42:47.631+0800    INFO    GoogleTranscriberV2.TranscribeStream Sender ReadAudio. sendCount:13, n:8192, err:<nil>
2026-01-06T20:42:47.631+0800    INFO    GoogleTranscriberV2.TranscribeStream Sender SendAudio. sendCount:13, n:8192, err:<nil>
2026-01-06T20:42:47.782+0800    INFO    GoogleTranscriberV2.TranscribeStream Sender ReadAudio. sendCount:14, n:8192, err:<nil>
2026-01-06T20:42:47.782+0800    INFO    GoogleTranscriberV2.TranscribeStream Sender SendAudio. sendCount:14, n:8192, err:<nil>
2026-01-06T20:42:47.934+0800    INFO    GoogleTranscriberV2.TranscribeStream Sender ReadAudio. sendCount:15, n:8192, err:<nil>
2026-01-06T20:42:47.935+0800    INFO    GoogleTranscriberV2.TranscribeStream Sender SendAudio. sendCount:15, n:8192, err:<nil>
2026-01-06T20:42:48.085+0800    INFO    GoogleTranscriberV2.TranscribeStream Sender ReadAudio. sendCount:16, n:8192, err:<nil>
2026-01-06T20:42:48.085+0800    INFO    GoogleTranscriberV2.TranscribeStream Sender SendAudio. sendCount:16, n:8192, err:<nil>
2026-01-06T20:42:48.236+0800    INFO    GoogleTranscriberV2.TranscribeStream Sender ReadAudio. sendCount:17, n:8192, err:<nil>
2026-01-06T20:42:48.236+0800    INFO    GoogleTranscriberV2.TranscribeStream Sender SendAudio. sendCount:17, n:8192, err:<nil>
2026-01-06T20:42:48.389+0800    INFO    GoogleTranscriberV2.TranscribeStream Sender ReadAudio. sendCount:18, n:6744, err:<nil>
2026-01-06T20:42:48.389+0800    INFO    GoogleTranscriberV2.TranscribeStream Sender SendAudio. sendCount:18, n:6744, err:<nil>
2026-01-06T20:42:48.540+0800    INFO    GoogleTranscriberV2.TranscribeStream Sender ReadAudio. sendCount:19, n:0, err:<nil>
2026-01-06T20:42:48.540+0800    INFO    GoogleTranscriberV2.TranscribeStream Sender ReadAudio. sendCount:20, n:0, err:EOF
2026-01-06T20:42:48.540+0800    INFO    GoogleTranscriberV2.TranscribeStream Sender CloseSend. err:<nil>
2026-01-06T20:42:48.540+0800    INFO    GoogleTranscriberV2.TranscribeStream Sender ReadAudioOver. sendCount:20
2026-01-06T20:42:51.439+0800    INFO    GoogleTranscriberV2.TranscribeStream Receiver Recv. receiveCount:1, resp:results:{alternatives:{transcript:"今年的最后一晚了,宝贝,要不要和我一起好好庆祝一下?"}  is_final:true  result_end_offset:{seconds:4  nanos:562749862}  language_code:"cmn-Hans-CN"}  metadata:{request_id:"69774bf6-0000-21e4-8aec-9898fbcda16d"  total_billed_duration:{seconds:5}}, err:<nil>
2026-01-06T20:42:51.439+0800    INFO    GoogleTranscriberV2.TranscribeStream Receiver RecvResult. receiveCount:1, resultCount:1, isFinal:true, transcript:今年的最后一晚了,宝贝,要不要和我一起好好庆祝一下?
2026-01-06T20:42:51.440+0800    INFO    GoogleTranscriberV2.TranscribeStream Receiver Over. finalText:今年的最后一晚了,宝贝,要不要和我一起好好庆祝一下?, isComplete:true, recvErr:<nil>
2026-01-06T20:42:51.440+0800    INFO    GoogleTranscriberV2.TranscribeStream Result. result:&{Text:今年的最后一晚了,宝贝,要不要和我一起好好庆祝一下? IsComplete:true Err:<nil>}
2026-01-06T20:42:51.440+0800    INFO    GoogleTranscriberV2.TranscribeStream End. results:&{Text:今年的最后一晚了,宝贝,要不要和我一起好好庆祝一下? Duration:5.634542958s Cost:0}

Metadata

Metadata

Assignees

Labels

api: speechIssues related to the Speech-to-Text API.type: questionRequest for information or clarification. Not an issue.

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions