GoogleCloudPlatform · cfloress · Nov 17, 2025 · Nov 17, 2025
@@ -4,6 +4,8 @@ go 1.24.0
 
 require (
 	github.com/GoogleCloudPlatform/golang-samples v0.0.0-20250201051611-5fb145d1e974
+	github.com/go-audio/audio v1.0.0
+	github.com/go-audio/wav v1.1.0
 	golang.org/x/oauth2 v0.25.0
 	google.golang.org/genai v1.17.0
 )
@@ -25,6 +27,7 @@ require (
 	github.com/envoyproxy/go-control-plane/envoy v1.32.3 // indirect
 	github.com/envoyproxy/protoc-gen-validate v1.1.0 // indirect
 	github.com/felixge/httpsnoop v1.0.4 // indirect
+	github.com/go-audio/riff v1.0.0 // indirect
 	github.com/go-logr/logr v1.4.2 // indirect
 	github.com/go-logr/stdr v1.2.2 // indirect
 	github.com/google/go-cmp v0.6.0 // indirect

@@ -46,6 +46,12 @@ github.com/envoyproxy/protoc-gen-validate v1.1.0 h1:tntQDh69XqOCOZsDz0lVJQez/2L6
 github.com/envoyproxy/protoc-gen-validate v1.1.0/go.mod h1:sXRDRVmzEbkM7CVcM06s9shE/m23dg3wzjl0UWqJ2q4=
 github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
 github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
+github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4=
+github.com/go-audio/audio v1.0.0/go.mod h1:6uAu0+H2lHkwdGsAY+j2wHPNPpPoeg5AaEFh9FlA+Zs=
+github.com/go-audio/riff v1.0.0 h1:d8iCGbDvox9BfLagY94fBynxSPHO80LmZCaOsmKxokA=
+github.com/go-audio/riff v1.0.0/go.mod h1:l3cQwc85y79NQFCRB7TiPoNiaijp6q8Z0Uv38rVG498=
+github.com/go-audio/wav v1.1.0 h1:jQgLtbqBzY7G+BM8fXF7AHUk1uHUviWS4X39d5rsL2g=
+github.com/go-audio/wav v1.1.0/go.mod h1:mpe9qfwbScEbkd8uybLuIpTgHyrISw/OTuvjUW2iGtE=
 github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
 github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
 github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=

@@ -0,0 +1,192 @@
+// Copyright 2025 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package live shows how to use the GenAI SDK to generate text with live resources.
+package live
+
+// [START googlegenaisdk_live_conversation_audio_with_audio]
+import (
+	"context"
+	"encoding/binary"
+	"fmt"
+	"io"
+	"os"
+
+	"github.com/go-audio/audio"
+	"github.com/go-audio/wav"
+	"google.golang.org/genai"
+)
+
+// generateLiveAudioConversation demonstrates two-way audio interaction with a Gemini model using live streaming.
+func generateLiveAudioConversation(w io.Writer, audioFilePath string) error {
+	ctx := context.Background()
+
+	client, err := genai.NewClient(ctx, &genai.ClientConfig{
+		HTTPOptions: genai.HTTPOptions{
+			APIVersion: "v1beta1",
+		},
+	})
+	if err != nil {
+		return fmt.Errorf("failed to create genai client: %w", err)
+	}
+
+	modelName := "gemini-live-2.5-flash-preview-native-audio-09-2025"
+
+	// Configure model to receive and respond with audio, including transcriptions.
+	config := &genai.LiveConnectConfig{
+		ResponseModalities:       []genai.Modality{genai.ModalityAudio},
+		InputAudioTranscription:  &genai.AudioTranscriptionConfig{},
+		OutputAudioTranscription: &genai.AudioTranscriptionConfig{},
+	}
+
+	session, err := client.Live.Connect(ctx, modelName, config)
+	if err != nil {
+		return fmt.Errorf("failed to connect live: %w", err)
+	}
+	defer session.Close()
+
+	// Load the audio file
+	audioBytes, mimeType, err := loadAudioAsPCMBytes(audioFilePath)
+	if err != nil {
+		return fmt.Errorf("failed to load audio: %w", err)
+	}
+
+	fmt.Fprintf(w, "> Streaming audio from %s to the model\n\n", audioFilePath)
+
+	// Send audio data to the model
+	err = session.SendRealtimeInput(genai.LiveSendRealtimeInputParameters{
+		Media: &genai.Blob{
+			Data:     audioBytes,
+			MIMEType: mimeType,
+		},
+	})
+	if err != nil {
+		return fmt.Errorf("failed to send realtime input: %w", err)
+	}
+
+	// Gather audio response frames
+	var audioFrames [][]byte
+
+	for {
+		chunk, err := session.Receive()
+		if err != nil {
+			if err == io.EOF {
+				break
+			}
+			return fmt.Errorf("error receiving response: %w", err)
+		}
+
+		if chunk.ServerContent != nil {
+			if chunk.ServerContent.InputTranscription != nil {
+				fmt.Fprintf(w, "Input transcription: %s\n", chunk.ServerContent.InputTranscription.Text)
+			}
+			if chunk.ServerContent.OutputTranscription != nil {
+				fmt.Fprintf(w, "Output transcription: %s\n", chunk.ServerContent.OutputTranscription.Text)
+			}
+			if chunk.ServerContent.ModelTurn != nil {
+				for _, part := range chunk.ServerContent.ModelTurn.Parts {
+					if part.InlineData != nil && len(part.InlineData.Data) > 0 {
+						audioFrames = append(audioFrames, part.InlineData.Data)
+					}
+				}
+			}
+		}
+	}
+
+	// Save audio frames to WAV file if available
+	if len(audioFrames) > 0 {
+		outputFile := "model_response.wav"
+		err := saveAudioFramesAsWAV(outputFile, audioFrames, 24000)
+		if err != nil {
+			return fmt.Errorf("failed to write WAV: %w", err)
+		}
+		fmt.Fprintf(w, "Model response saved to %s\n", outputFile)
+	}
+
+	// Example output:
+	// gemini-2.0-flash-live-preview-04-09
+	// {'input_transcription': {'text': 'Hello.'}}
+	// {'output_transcription': {}}
+	// {'output_transcription': {'text': 'Hi'}}
+	// {'output_transcription': {'text': ' there. What can I do for you today?'}}
+	// {'output_transcription': {'finished': True}}
+	// Model response saved to example_model_response.wav
+	return nil
+}
+
+// loadAudioAsPCMBytes reads a WAV file and returns PCM bytes with a MIME type.
+func loadAudioAsPCMBytes(path string) ([]byte, string, error) {
+	file, err := os.Open(path)
+	if err != nil {
+		return nil, "", fmt.Errorf("failed to open WAV file: %w", err)
+	}
+	defer file.Close()
+
+	wavDecoder := wav.NewDecoder(file)
+	if !wavDecoder.IsValidFile() {
+		return nil, "", fmt.Errorf("invalid WAV file")
+	}
+	buf, err := wavDecoder.FullPCMBuffer()
+	if err != nil {
+		return nil, "", fmt.Errorf("failed to decode WAV: %w", err)
+	}
+
+	sampleRate := wavDecoder.SampleRate
+	rawInts := buf.Data
+	data := make([]byte, len(rawInts)*2) // 16-bit PCM
+
+	for i, sample := range rawInts {
+		binary.LittleEndian.PutUint16(data[i*2:], uint16(int16(sample)))
+	}
+
+	mimeType := fmt.Sprintf("audio/pcm;rate=%d", sampleRate)
+	return data, mimeType, nil
+}
+
+// saveAudioFramesAsWAV writes audio frames (PCM bytes) to a WAV file.
+func saveAudioFramesAsWAV(filePath string, frames [][]byte, sampleRate int) error {
+	audioData := []byte{}
+	for _, f := range frames {
+		audioData = append(audioData, f...)
+	}
+
+	// Create buffer
+	intData := audio.IntBuffer{
+		Format: &audio.Format{NumChannels: 1, SampleRate: sampleRate},
+		Data:   make([]int, len(audioData)/2),
+	}
+
+	for i := 0; i < len(audioData); i += 2 {
+		intData.Data[i/2] = int(int16(audioData[i]) | int16(audioData[i+1])<<8)
+	}
+
+	file, err := os.Create(filePath)
+	if err != nil {
+		return fmt.Errorf("failed to create WAV file: %w", err)
+	}
+	defer file.Close()
+
+	wavEncoder := wav.NewEncoder(file, sampleRate, 16, 1, 1)
+	if err := wavEncoder.Write(&intData); err != nil {
+		return fmt.Errorf("failed to write audio data: %w", err)
+	}
+
+	if err := wavEncoder.Close(); err != nil {
+		return fmt.Errorf("failed to finalize WAV file: %w", err)
+	}
+
+	return nil
+}
+
+// [END googlegenaisdk_live_conversation_audio_with_audio]
@@ -49,6 +49,27 @@ func generateStructuredOutputWithTxtMock(w io.Writer) error {
 	_, err = fmt.Fprintln(w, string(b))
 	return err
 }
+func generateLiveRAGWithTextMock(w io.Writer, memoryCorpus string) error {
+	mockOutput := "> What are the newest Gemini models?\n\nGemini 2.0 Flash and Gemini 2.5 Ultra are among the latest models released by Google."
+	_, err := fmt.Fprintln(w, mockOutput)
+	return err
+}
+
+// Mock function simulating generateLiveTextWithAudio without API/WebSocket.
+func generateLiveTextWithAudioMock(w io.Writer) error {
+	audioURL := "https://storage.googleapis.com/generativeai-downloads/data/16000.wav"
+	mockResponse := fmt.Sprintf("> Answer to this audio url: %s\n\nMocked transcript response: Hello from mock!", audioURL)
+	_, err := fmt.Fprintln(w, mockResponse)
+	return err
+}
+
+// Mock version of generateLiveAudioConversation
+func generateLiveAudioConversationMock(w io.Writer, audioFile string) error {
+	// Simulating behavior: write the audioFile name and a processed message
+	mockOutput := fmt.Sprintf("> Received audio file: %s\nProcessed mock response: Hello from mock audio!", audioFile)
+	_, err := fmt.Fprintln(w, mockOutput)
+	return err
+}
 
 func TestLiveGeneration(t *testing.T) {
 	tc := testutil.SystemTest(t)
@@ -96,4 +117,41 @@ func TestLiveGeneration(t *testing.T) {
 		}
 	})
 
+	t.Run("generate RAG with txt", func(t *testing.T) {
+		buf.Reset()
+		if err := generateLiveRAGWithTextMock(buf, "test"); err != nil {
+			t.Fatalf("generateLiveRAGWithText failed: %v", err)
+		}
+
+		output := buf.String()
+		if output == "" {
+			t.Error("expected non-empty output, got empty")
+		}
+	})
+
+	t.Run("generate RAG with txt", func(t *testing.T) {
+		buf.Reset()
+		if err := generateLiveTextWithAudioMock(buf); err != nil {
+			t.Fatalf("generateLiveTextWithAudio failed: %v", err)
+		}
+
+		output := buf.String()
+		if output == "" {
+			t.Error("expected non-empty output, got empty")
+		}
+	})
+
+	t.Run("generate live audio conversation", func(t *testing.T) {
+		buf.Reset()
+		err := generateLiveAudioConversationMock(buf, "sample_audio.wav")
+		if err != nil {
+			t.Fatalf("generateLiveAudioConversation failed: %v", err)
+		}
+
+		output := buf.String()
+		if output == "" {
+			t.Error("expected non-empty output, got empty")
+		}
+	})
+
 }