Skip to content

Commit acfa8ab

Browse files
Add dist/ and lib/ folders needed by Realtime translation example (#1769)
1 parent dbf884f commit acfa8ab

26 files changed

+1877
-2
lines changed

examples/voice_solutions/one_way_translation_using_realtime_api.mdx

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,7 @@ This cookbook demonstrates how to use OpenAI's [ Realtime API](https://platform.
77
A real-world use case for this demo is a multilingual, conversational translation where a speaker talks into the speaker app and listeners hear translations in their selected native language via the listener app. Imagine a conference room with a speaker talking in English and a participant with headphones in choosing to listen to a Tagalog translation. Due to the current turn-based nature of audio models, the speaker must pause briefly to allow the model to process and translate speech. However, as models become faster and more efficient, this latency will decrease significantly and the translation will become more seamless.
88

99

10-
Let's explore the main functionalities and code snippets that illustrate how the app works. You can find the code in the [accompanying repo](https://github.com/openai/openai-cookbook/tree/main/examples/voice_solutions/one_way_translation_using_realtime_api/README.md
11-
) if you want to run the app locally.
10+
Let's explore the main functionalities and code snippets that illustrate how the app works. You can find the code in the [accompanying repo](https://github.com/openai/openai-cookbook/tree/main/examples/voice_solutions/one_way_translation_using_realtime_api) if you want to run the app locally.
1211

1312
## High Level Architecture Overview
1413

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
import { WebSocketServer } from 'ws';
2+
import { RealtimeClient } from '@openai/realtime-api-beta';
3+
4+
export class RealtimeRelay {
5+
constructor(apiKey) {
6+
this.apiKey = apiKey;
7+
this.sockets = new WeakMap();
8+
this.wss = null;
9+
}
10+
11+
listen(port) {
12+
this.wss = new WebSocketServer({ port });
13+
this.wss.on('connection', this.connectionHandler.bind(this));
14+
this.log(`Listening on ws://localhost:${port}`);
15+
}
16+
17+
async connectionHandler(ws, req) {
18+
if (!req.url) {
19+
this.log('No URL provided, closing connection.');
20+
ws.close();
21+
return;
22+
}
23+
24+
const url = new URL(req.url, `http://${req.headers.host}`);
25+
const pathname = url.pathname;
26+
27+
if (pathname !== '/') {
28+
this.log(`Invalid pathname: "${pathname}"`);
29+
ws.close();
30+
return;
31+
}
32+
33+
// Instantiate new client
34+
this.log(`Connecting with key "${this.apiKey.slice(0, 3)}..."`);
35+
const client = new RealtimeClient({ apiKey: this.apiKey });
36+
37+
// Relay: OpenAI Realtime API Event -> Browser Event
38+
client.realtime.on('server.*', (event) => {
39+
this.log(`Relaying "${event.type}" to Client`);
40+
ws.send(JSON.stringify(event));
41+
});
42+
client.realtime.on('close', () => ws.close());
43+
44+
// Relay: Browser Event -> OpenAI Realtime API Event
45+
// We need to queue data waiting for the OpenAI connection
46+
const messageQueue = [];
47+
const messageHandler = (data) => {
48+
try {
49+
const event = JSON.parse(data);
50+
this.log(`Relaying "${event.type}" to OpenAI`);
51+
client.realtime.send(event.type, event);
52+
} catch (e) {
53+
console.error(e.message);
54+
this.log(`Error parsing event from client: ${data}`);
55+
}
56+
};
57+
ws.on('message', (data) => {
58+
if (!client.isConnected()) {
59+
messageQueue.push(data);
60+
} else {
61+
messageHandler(data);
62+
}
63+
});
64+
ws.on('close', () => client.disconnect());
65+
66+
// Connect to OpenAI Realtime API
67+
try {
68+
this.log(`Connecting to OpenAI...`);
69+
await client.connect();
70+
} catch (e) {
71+
this.log(`Error connecting to OpenAI: ${e.message}`);
72+
ws.close();
73+
return;
74+
}
75+
this.log(`Connected to OpenAI successfully!`);
76+
while (messageQueue.length) {
77+
messageHandler(messageQueue.shift());
78+
}
79+
}
80+
81+
log(...args) {
82+
console.log(`[RealtimeRelay]`, ...args);
83+
}
84+
}
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
import { AudioAnalysis } from './lib/analysis/audio_analysis.js';
2+
import { WavPacker } from './lib/wav_packer.js';
3+
import { WavStreamPlayer } from './lib/wav_stream_player.js';
4+
import { WavRecorder } from './lib/wav_recorder.js';
5+
export { AudioAnalysis, WavPacker, WavStreamPlayer, WavRecorder };
6+
//# sourceMappingURL=index.d.ts.map

examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/dist/index.d.ts.map

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
/**
2+
* Output of AudioAnalysis for the frequency domain of the audio
3+
* @typedef {Object} AudioAnalysisOutputType
4+
* @property {Float32Array} values Amplitude of this frequency between {0, 1} inclusive
5+
* @property {number[]} frequencies Raw frequency bucket values
6+
* @property {string[]} labels Labels for the frequency bucket values
7+
*/
8+
/**
9+
* Analyzes audio for visual output
10+
* @class
11+
*/
12+
export class AudioAnalysis {
13+
/**
14+
* Retrieves frequency domain data from an AnalyserNode adjusted to a decibel range
15+
* returns human-readable formatting and labels
16+
* @param {AnalyserNode} analyser
17+
* @param {number} sampleRate
18+
* @param {Float32Array} [fftResult]
19+
* @param {"frequency"|"music"|"voice"} [analysisType]
20+
* @param {number} [minDecibels] default -100
21+
* @param {number} [maxDecibels] default -30
22+
* @returns {AudioAnalysisOutputType}
23+
*/
24+
static getFrequencies(analyser: AnalyserNode, sampleRate: number, fftResult?: Float32Array, analysisType?: "frequency" | "music" | "voice", minDecibels?: number, maxDecibels?: number): AudioAnalysisOutputType;
25+
/**
26+
* Creates a new AudioAnalysis instance for an HTMLAudioElement
27+
* @param {HTMLAudioElement} audioElement
28+
* @param {AudioBuffer|null} [audioBuffer] If provided, will cache all frequency domain data from the buffer
29+
* @returns {AudioAnalysis}
30+
*/
31+
constructor(audioElement: HTMLAudioElement, audioBuffer?: AudioBuffer | null);
32+
fftResults: any[];
33+
audio: HTMLAudioElement;
34+
context: any;
35+
analyser: any;
36+
sampleRate: any;
37+
audioBuffer: any;
38+
/**
39+
* Gets the current frequency domain data from the playing audio track
40+
* @param {"frequency"|"music"|"voice"} [analysisType]
41+
* @param {number} [minDecibels] default -100
42+
* @param {number} [maxDecibels] default -30
43+
* @returns {AudioAnalysisOutputType}
44+
*/
45+
getFrequencies(analysisType?: "frequency" | "music" | "voice", minDecibels?: number, maxDecibels?: number): AudioAnalysisOutputType;
46+
/**
47+
* Resume the internal AudioContext if it was suspended due to the lack of
48+
* user interaction when the AudioAnalysis was instantiated.
49+
* @returns {Promise<true>}
50+
*/
51+
resumeIfSuspended(): Promise<true>;
52+
}
53+
/**
54+
* Output of AudioAnalysis for the frequency domain of the audio
55+
*/
56+
export type AudioAnalysisOutputType = {
57+
/**
58+
* Amplitude of this frequency between {0, 1} inclusive
59+
*/
60+
values: Float32Array;
61+
/**
62+
* Raw frequency bucket values
63+
*/
64+
frequencies: number[];
65+
/**
66+
* Labels for the frequency bucket values
67+
*/
68+
labels: string[];
69+
};
70+
//# sourceMappingURL=audio_analysis.d.ts.map

examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/dist/lib/analysis/audio_analysis.d.ts.map

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
/**
2+
* All note frequencies from 1st to 8th octave
3+
* in format "A#8" (A#, 8th octave)
4+
*/
5+
export const noteFrequencies: any[];
6+
export const noteFrequencyLabels: any[];
7+
export const voiceFrequencies: any[];
8+
export const voiceFrequencyLabels: any[];
9+
//# sourceMappingURL=constants.d.ts.map

examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/dist/lib/analysis/constants.d.ts.map

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
/**
2+
* Raw wav audio file contents
3+
* @typedef {Object} WavPackerAudioType
4+
* @property {Blob} blob
5+
* @property {string} url
6+
* @property {number} channelCount
7+
* @property {number} sampleRate
8+
* @property {number} duration
9+
*/
10+
/**
11+
* Utility class for assembling PCM16 "audio/wav" data
12+
* @class
13+
*/
14+
export class WavPacker {
15+
/**
16+
* Converts Float32Array of amplitude data to ArrayBuffer in Int16Array format
17+
* @param {Float32Array} float32Array
18+
* @returns {ArrayBuffer}
19+
*/
20+
static floatTo16BitPCM(float32Array: Float32Array): ArrayBuffer;
21+
/**
22+
* Concatenates two ArrayBuffers
23+
* @param {ArrayBuffer} leftBuffer
24+
* @param {ArrayBuffer} rightBuffer
25+
* @returns {ArrayBuffer}
26+
*/
27+
static mergeBuffers(leftBuffer: ArrayBuffer, rightBuffer: ArrayBuffer): ArrayBuffer;
28+
/**
29+
* Packs data into an Int16 format
30+
* @private
31+
* @param {number} size 0 = 1x Int16, 1 = 2x Int16
32+
* @param {number} arg value to pack
33+
* @returns
34+
*/
35+
private _packData;
36+
/**
37+
* Packs audio into "audio/wav" Blob
38+
* @param {number} sampleRate
39+
* @param {{bitsPerSample: number, channels: Array<Float32Array>, data: Int16Array}} audio
40+
* @returns {WavPackerAudioType}
41+
*/
42+
pack(sampleRate: number, audio: {
43+
bitsPerSample: number;
44+
channels: Array<Float32Array>;
45+
data: Int16Array;
46+
}): WavPackerAudioType;
47+
}
48+
/**
49+
* Raw wav audio file contents
50+
*/
51+
export type WavPackerAudioType = {
52+
blob: Blob;
53+
url: string;
54+
channelCount: number;
55+
sampleRate: number;
56+
duration: number;
57+
};
58+
//# sourceMappingURL=wav_packer.d.ts.map

examples/voice_solutions/one_way_translation_using_realtime_api/src/lib/wavtools/dist/lib/wav_packer.d.ts.map

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)