Skip to content

Commit 88cc52f

Browse files
author
Guiners
committed
adding samples, test, lints
1 parent 8ab17d8 commit 88cc52f

File tree

3 files changed

+189
-0
lines changed

3 files changed

+189
-0
lines changed
95.4 KB
Binary file not shown.
Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
// Copyright 2025 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// https://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
// [START googlegenaisdk_live_conversation_audio_with_audio]
16+
17+
'use strict';
18+
19+
const fs = require('fs');
20+
const path = require('path');
21+
const {GoogleGenAI, Modality} = require('@google/genai');
22+
23+
const MODEL = 'gemini-2.0-flash-live-preview-04-09';
24+
const INPUT_RATE = 16000;
25+
const OUTPUT_RATE = 24000;
26+
const SAMPLE_WIDTH = 2; // 16-bit
27+
28+
const GOOGLE_CLOUD_PROJECT = process.env.GOOGLE_CLOUD_PROJECT;
29+
const GOOGLE_CLOUD_LOCATION = process.env.GOOGLE_CLOUD_LOCATION || 'global';
30+
31+
function readWavefile(filepath) {
32+
const buffer = fs.readFileSync(filepath);
33+
const audioBytes = buffer.subarray(44);
34+
const base64Data = audioBytes.toString('base64');
35+
const mimeType = `audio/pcm;rate=${INPUT_RATE}`;
36+
return {base64Data, mimeType};
37+
}
38+
39+
// Utility: write bytes -> .wav file
40+
function writeWavefile(filepath, audioFrames, rate = OUTPUT_RATE) {
41+
const rawAudioBytes = Buffer.concat(audioFrames);
42+
const header = Buffer.alloc(44);
43+
header.write('RIFF', 0);
44+
header.writeUInt32LE(36 + rawAudioBytes.length, 4);
45+
header.write('WAVE', 8);
46+
header.write('fmt ', 12);
47+
header.writeUInt32LE(16, 16);
48+
header.writeUInt16LE(1, 20);
49+
header.writeUInt16LE(1, 22);
50+
header.writeUInt32LE(rate, 24);
51+
header.writeUInt32LE(rate * SAMPLE_WIDTH, 28);
52+
header.writeUInt16LE(SAMPLE_WIDTH, 32);
53+
header.writeUInt16LE(16, 34);
54+
header.write('data', 36);
55+
header.writeUInt32LE(rawAudioBytes.length, 40);
56+
57+
fs.writeFileSync(filepath, Buffer.concat([header, rawAudioBytes]));
58+
console.log(`Model response saved to ${filepath}`);
59+
}
60+
61+
async function generateContent(
62+
projectId = GOOGLE_CLOUD_PROJECT,
63+
location = GOOGLE_CLOUD_LOCATION
64+
) {
65+
console.log('Starting audio conversation sample...');
66+
console.log(`Project: ${projectId}, Location: ${location}`);
67+
68+
const client = new GoogleGenAI({
69+
vertexai: true,
70+
project: projectId,
71+
location: location,
72+
});
73+
74+
const responseQueue = [];
75+
76+
async function waitMessage() {
77+
while (responseQueue.length === 0) {
78+
await new Promise(resolve => setTimeout(resolve, 100));
79+
}
80+
return responseQueue.shift();
81+
}
82+
83+
async function handleTurn() {
84+
const audioFrames = [];
85+
let done = false;
86+
87+
while (!done) {
88+
const message = await waitMessage();
89+
const sc = message.serverContent;
90+
91+
if (sc.inputTranscription) {
92+
console.log('Input transcription', sc.inputTranscription);
93+
}
94+
if (sc.outputTranscription) {
95+
console.log('Output transcription', sc.outputTranscription);
96+
}
97+
if (sc.modelTurn.parts) {
98+
for (const part of sc.modelTurn.parts) {
99+
if (part.inlineData.data) {
100+
const audioData = Buffer.from(part.inlineData.data, 'base64');
101+
audioFrames.push(audioData);
102+
}
103+
}
104+
}
105+
if (sc.turnComplete) {
106+
done = true;
107+
}
108+
}
109+
110+
return audioFrames;
111+
}
112+
113+
const session = await client.live.connect({
114+
model: MODEL,
115+
config: {
116+
responseModalities: [Modality.AUDIO],
117+
inputAudioTranscription: {},
118+
outputAudioTranscription: {},
119+
},
120+
callbacks: {
121+
onmessage: msg => responseQueue.push(msg),
122+
onerror: e => console.error(e.message),
123+
onclose: () => console.log('Closed'),
124+
},
125+
});
126+
127+
const wavFilePath = path.join(__dirname, 'hello_gemini_are_you_there.wav');
128+
console.log('Reading file:', wavFilePath);
129+
130+
const {base64Data, mimeType} = readWavefile(wavFilePath);
131+
const audioBytes = Buffer.from(base64Data, 'base64');
132+
133+
await session.sendRealtimeInput({
134+
media: {
135+
data: audioBytes.toString('base64'),
136+
mimeType: mimeType,
137+
},
138+
});
139+
140+
console.log('Audio sent, waiting for response...');
141+
142+
const audioFrames = await handleTurn();
143+
if (audioFrames.length > 0) {
144+
writeWavefile(
145+
path.join(__dirname, 'example_model_response.wav'),
146+
audioFrames,
147+
OUTPUT_RATE
148+
);
149+
}
150+
151+
await session.close();
152+
return audioFrames;
153+
}
154+
155+
// [END googlegenaisdk_live_conversation_audio_with_audio]
156+
157+
module.exports = {
158+
generateContent,
159+
};
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
// Copyright 2025 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// https://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
'use strict';
16+
17+
const {assert} = require('chai');
18+
const {describe, it} = require('mocha');
19+
20+
const projectId = process.env.CAIP_PROJECT_ID;
21+
const sample = require('../live/live-conversation-audio-with-audio');
22+
23+
describe('live-conversation-audio-with-audio', () => {
24+
it('should generate text content from a text prompt and multiple images', async function () {
25+
this.timeout(180000);
26+
const output = await sample.generateContent(projectId);
27+
console.log('Generated output:', output);
28+
assert(output.length > 0);
29+
});
30+
});

0 commit comments

Comments
 (0)