Skip to content

Commit c30cec2

Browse files
Add multi-language and temperature support in js SDK (#398)
* Add multi-language and temperature support in js SDK * Modify the audio input to match chat client input
1 parent a59c286 commit c30cec2

File tree

2 files changed

+127
-2
lines changed

2 files changed

+127
-2
lines changed

sdk_v2/js/src/openai/audioClient.ts

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,50 @@
11
import { CoreInterop } from '../detail/coreInterop.js';
22

3+
export class AudioClientSettings {
4+
language?: string;
5+
temperature?: number;
6+
7+
/**
8+
* Serializes the settings into an OpenAI-compatible request object.
9+
* @internal
10+
*/
11+
_serialize() {
12+
// Standard OpenAI properties
13+
const result: any = {
14+
Language: this.language,
15+
Temperature: this.temperature,
16+
};
17+
18+
// Foundry specific metadata properties
19+
const metadata: Record<string, string> = {};
20+
if (this.language !== undefined) {
21+
metadata["language"] = this.language;
22+
}
23+
if (this.temperature !== undefined) {
24+
metadata["temperature"] = this.temperature.toString();
25+
}
26+
27+
if (Object.keys(metadata).length > 0) {
28+
result.metadata = metadata;
29+
}
30+
31+
// Filter out undefined properties
32+
return Object.fromEntries(Object.entries(result).filter(([_, v]) => v !== undefined));
33+
}
34+
}
35+
336
/**
437
* Client for performing audio operations (transcription, translation) with a loaded model.
538
* Follows the OpenAI Audio API structure.
639
*/
740
export class AudioClient {
841
private modelId: string;
942
private coreInterop: CoreInterop;
43+
44+
/**
45+
* Configuration settings for audio operations.
46+
*/
47+
public settings = new AudioClientSettings();
1048

1149
constructor(modelId: string, coreInterop: CoreInterop) {
1250
this.modelId = modelId;
@@ -21,7 +59,8 @@ export class AudioClient {
2159
public async transcribe(audioFilePath: string): Promise<any> {
2260
const request = {
2361
Model: this.modelId,
24-
FileName: audioFilePath
62+
FileName: audioFilePath,
63+
...this.settings._serialize()
2564
};
2665

2766
const response = this.coreInterop.executeCommand("audio_transcribe", { Params: { OpenAICreateRequest: JSON.stringify(request) } });
@@ -37,7 +76,8 @@ export class AudioClient {
3776
public async transcribeStreaming(audioFilePath: string, callback: (chunk: any) => void): Promise<void> {
3877
const request = {
3978
Model: this.modelId,
40-
FileName: audioFilePath
79+
FileName: audioFilePath,
80+
...this.settings._serialize()
4181
};
4282

4383
await this.coreInterop.executeCommandStreaming(

sdk_v2/js/test/openai/audioClient.test.ts

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,47 @@ describe('Audio Client Tests', () => {
2929
try {
3030
const audioClient = model.createAudioClient();
3131
expect(audioClient).to.not.be.undefined;
32+
33+
audioClient.settings.language = 'en';
34+
audioClient.settings.temperature = 0.0; // for deterministic results
35+
36+
const response = await audioClient.transcribe(AUDIO_FILE_PATH);
37+
38+
expect(response).to.not.be.undefined;
39+
expect(response.text).to.not.be.undefined;
40+
expect(response.text).to.be.a('string');
41+
expect(response.text.length).to.be.greaterThan(0);
42+
expect(response.text).to.equal(EXPECTED_TEXT);
43+
console.log(`Response: ${response.text}`);
44+
} finally {
45+
await model.unload();
46+
}
47+
});
48+
49+
it('should transcribe audio without streaming with temperature', async function() {
50+
this.timeout(30000);
51+
const manager = getTestManager();
52+
const catalog = manager.catalog;
53+
54+
const cachedModels = await catalog.getCachedModels();
55+
expect(cachedModels.length).to.be.greaterThan(0);
56+
57+
const cachedVariant = cachedModels.find(m => m.alias === WHISPER_MODEL_ALIAS);
58+
expect(cachedVariant, 'whisper-tiny should be cached').to.not.be.undefined;
59+
60+
const model = await catalog.getModel(WHISPER_MODEL_ALIAS);
61+
expect(model).to.not.be.undefined;
62+
if (!model || !cachedVariant) return;
63+
64+
model.selectVariant(cachedVariant.id);
65+
await model.load();
66+
67+
try {
68+
const audioClient = model.createAudioClient();
69+
expect(audioClient).to.not.be.undefined;
70+
71+
audioClient.settings.language = 'en';
72+
audioClient.settings.temperature = 0.0; // for deterministic results
3273

3374
const response = await audioClient.transcribe(AUDIO_FILE_PATH);
3475

@@ -65,6 +106,50 @@ describe('Audio Client Tests', () => {
65106
const audioClient = model.createAudioClient();
66107
expect(audioClient).to.not.be.undefined;
67108

109+
audioClient.settings.language = 'en';
110+
audioClient.settings.temperature = 0.0; // for deterministic results
111+
112+
let fullResponse = '';
113+
await audioClient.transcribeStreaming(AUDIO_FILE_PATH, (chunk) => {
114+
expect(chunk).to.not.be.undefined;
115+
expect(chunk.text).to.not.be.undefined;
116+
expect(chunk.text).to.be.a('string');
117+
expect(chunk.text.length).to.be.greaterThan(0);
118+
fullResponse += chunk.text;
119+
});
120+
121+
console.log(`Full response: ${fullResponse}`);
122+
expect(fullResponse).to.equal(EXPECTED_TEXT);
123+
} finally {
124+
await model.unload();
125+
}
126+
});
127+
128+
it('should transcribe audio with streaming with temperature', async function() {
129+
this.timeout(30000);
130+
const manager = getTestManager();
131+
const catalog = manager.catalog;
132+
133+
const cachedModels = await catalog.getCachedModels();
134+
expect(cachedModels.length).to.be.greaterThan(0);
135+
136+
const cachedVariant = cachedModels.find(m => m.alias === WHISPER_MODEL_ALIAS);
137+
expect(cachedVariant, 'whisper-tiny should be cached').to.not.be.undefined;
138+
139+
const model = await catalog.getModel(WHISPER_MODEL_ALIAS);
140+
expect(model).to.not.be.undefined;
141+
if (!model || !cachedVariant) return;
142+
143+
model.selectVariant(cachedVariant.id);
144+
await model.load();
145+
146+
try {
147+
const audioClient = model.createAudioClient();
148+
expect(audioClient).to.not.be.undefined;
149+
150+
audioClient.settings.language = 'en';
151+
audioClient.settings.temperature = 0.0; // for deterministic results
152+
68153
let fullResponse = '';
69154
await audioClient.transcribeStreaming(AUDIO_FILE_PATH, (chunk) => {
70155
expect(chunk).to.not.be.undefined;

0 commit comments

Comments
 (0)