Skip to content

Commit 7a43d09

Browse files
authored
Merge pull request #117 from AssemblyAI/61C6D6CA2C0CC6A8F8859B5BFCBA61A5
Sync from internal repo (YYYY/MM/DD)
2 parents 430b02f + 2564f16 commit 7a43d09

File tree

7 files changed

+94
-8
lines changed

7 files changed

+94
-8
lines changed

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
# Changelog
22

3+
## [4.20.0]
4+
5+
- Add `on_low_language_confidence` property to `LanguageDetectionOptions`
6+
> Controls behavior when language confidence is below threshold. Either "error" (default) or "fallback".
7+
> When set to "fallback", the transcription will use the fallback language instead of erroring when confidence is low.
8+
39
## [4.8.0]
410

511
- Add `multichannel` property to `TranscriptParams`

docs/compat.md

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@ If you do use an older version of Node.js like version 16, you'll need to polyfi
1313
To make the SDK compatible with the browser, the SDK aims to use web standards as much as possible.
1414
However, there are still incompatibilities between Node.js and the browser.
1515

16-
- `StreamingTranscriber` doesn't support the AssemblyAI API key in the browser.
17-
Instead, you have to generate a temporary auth token using `client.streaming.createTemporaryToken`, and pass in the resulting token to the streaming transcriber.
16+
- `RealtimeTranscriber` doesn't support the AssemblyAI API key in the browser.
17+
Instead, you have to generate a temporary auth token using `client.realtime.createTemporaryToken`, and pass in the resulting token to the real-time transcriber.
1818

1919
Generate a temporary auth token on the server.
2020

@@ -23,24 +23,24 @@ However, there are still incompatibilities between Node.js and the browser.
2323
// Ideally, to avoid embedding your API key client side,
2424
// you generate this token on the server, and pass it to the client via an API.
2525
const client = new AssemblyAI({ apiKey: "YOUR_API_KEY" });
26-
const token = await client.streaming.createTemporaryToken({ expires_in_seconds: 60 });
26+
const token = await client.realtime.createTemporaryToken({ expires_in = 480 });
2727
```
2828

2929
> [!NOTE]
3030
> We recommend generating the token on the server, so you don't embed your AssemblyAI API key in your client app.
3131
> If you embed the API key on the client, everyone can see it and use it for themselves.
3232
3333
Then pass the token via an API to the client.
34-
On the client, create an instance of `StreamingTranscriber` using the token.
34+
On the client, create an instance of `RealtimeTranscriber` using the token.
3535

3636
```js
37-
import { StreamingTranscriber } from "assemblyai";
37+
import { RealtimeTranscriber } from "assemblyai";
3838
// or the following if you're using UMD
39-
// const { StreamingTranscriber } = assemblyai;
39+
// const { RealtimeTranscriber } = assemblyai;
4040

4141
const token = getToken(); // getToken is a function for you to implement
4242

43-
const rt = new StreamingTranscriber({
43+
const rt = new RealtimeTranscriber({
4444
token: token,
4545
});
4646
```

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "assemblyai",
3-
"version": "4.19.0",
3+
"version": "4.21.0",
44
"description": "The AssemblyAI JavaScript SDK provides an easy-to-use interface for interacting with the AssemblyAI API, which supports async and real-time transcription, as well as the latest LeMUR models.",
55
"engines": {
66
"node": ">=18"

src/services/streaming/service.ts

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,10 @@ export class StreamingTranscriber {
9797
);
9898
}
9999

100+
if (this.params.vadThreshold !== undefined) {
101+
searchParams.set("vad_threshold", this.params.vadThreshold.toString());
102+
}
103+
100104
if (this.params.formatTurns) {
101105
searchParams.set("format_turns", this.params.formatTurns.toString());
102106
}
@@ -128,6 +132,20 @@ export class StreamingTranscriber {
128132
searchParams.set("speech_model", this.params.speechModel.toString());
129133
}
130134

135+
if (this.params.languageDetection !== undefined) {
136+
searchParams.set(
137+
"language_detection",
138+
this.params.languageDetection.toString(),
139+
);
140+
}
141+
142+
if (this.params.inactivityTimeout !== undefined) {
143+
searchParams.set(
144+
"inactivity_timeout",
145+
this.params.inactivityTimeout.toString(),
146+
);
147+
}
148+
131149
url.search = searchParams.toString();
132150

133151
return url;

src/types/openapi.generated.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1690,6 +1690,10 @@ export type LanguageDetectionOptions = {
16901690
* The confidence threshold for the automatically detected code switching language.
16911691
*/
16921692
code_switching_confidence_threshold?: number | null;
1693+
/**
1694+
* Controls behavior when language confidence is below threshold. Either "error" (default) or "fallback".
1695+
*/
1696+
on_low_language_confidence?: string | null;
16931697
};
16941698

16951699
/**

src/types/streaming/index.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,14 @@ export type StreamingTranscriberParams = {
99
endOfTurnConfidenceThreshold?: number;
1010
minEndOfTurnSilenceWhenConfident?: number;
1111
maxTurnSilence?: number;
12+
vadThreshold?: number;
1213
formatTurns?: boolean;
1314
filterProfanity?: boolean;
1415
keyterms?: string[];
1516
keytermsPrompt?: string[];
1617
speechModel?: StreamingSpeechModel;
18+
languageDetection?: boolean;
19+
inactivityTimeout?: number;
1720
};
1821

1922
export type StreamingEvents = "open" | "close" | "turn" | "error";
@@ -54,6 +57,8 @@ export type TurnEvent = {
5457
transcript: string;
5558
end_of_turn_confidence: number;
5659
words: StreamingWord[];
60+
language_code?: string;
61+
language_confidence?: number;
5762
};
5863

5964
export type StreamingWord = {
@@ -79,6 +84,7 @@ export type StreamingUpdateConfiguration = {
7984
end_of_turn_confidence_threshold?: number;
8085
min_end_of_turn_silence_when_confident?: number;
8186
max_turn_silence?: number;
87+
vad_threshold?: number;
8288
format_turns?: boolean;
8389
};
8490

tests/unit/language-detection-options.test.ts

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,4 +143,56 @@ describe("language detection options", () => {
143143
const requestBody = JSON.parse(fetchMock.mock.calls[0][1]?.body as string);
144144
expect(requestBody.language_detection_options).toBe(null);
145145
});
146+
147+
it("should create transcript with on_low_language_confidence set to fallback", async () => {
148+
const languageDetectionOptions: LanguageDetectionOptions = {
149+
fallback_language: "en",
150+
on_low_language_confidence: "fallback",
151+
};
152+
153+
fetchMock.doMockOnceIf(
154+
requestMatches({ url: "/v2/transcript", method: "POST" }),
155+
JSON.stringify({ id: transcriptId, status: "queued" }),
156+
);
157+
158+
const transcript = await assembly.transcripts.submit({
159+
audio_url: remoteAudioURL,
160+
language_detection: true,
161+
language_confidence_threshold: 0.8,
162+
language_detection_options: languageDetectionOptions,
163+
});
164+
165+
expect(transcript.id).toBe(transcriptId);
166+
167+
const requestBody = JSON.parse(fetchMock.mock.calls[0][1]?.body as string);
168+
expect(requestBody.language_confidence_threshold).toBe(0.8);
169+
expect(requestBody.language_detection_options.fallback_language).toBe("en");
170+
expect(requestBody.language_detection_options.on_low_language_confidence).toBe("fallback");
171+
});
172+
173+
it("should create transcript with on_low_language_confidence set to error", async () => {
174+
const languageDetectionOptions: LanguageDetectionOptions = {
175+
fallback_language: "en",
176+
on_low_language_confidence: "error",
177+
};
178+
179+
fetchMock.doMockOnceIf(
180+
requestMatches({ url: "/v2/transcript", method: "POST" }),
181+
JSON.stringify({ id: transcriptId, status: "queued" }),
182+
);
183+
184+
const transcript = await assembly.transcripts.submit({
185+
audio_url: remoteAudioURL,
186+
language_detection: true,
187+
language_confidence_threshold: 0.7,
188+
language_detection_options: languageDetectionOptions,
189+
});
190+
191+
expect(transcript.id).toBe(transcriptId);
192+
193+
const requestBody = JSON.parse(fetchMock.mock.calls[0][1]?.body as string);
194+
expect(requestBody.language_confidence_threshold).toBe(0.7);
195+
expect(requestBody.language_detection_options.fallback_language).toBe("en");
196+
expect(requestBody.language_detection_options.on_low_language_confidence).toBe("error");
197+
});
146198
});

0 commit comments

Comments
 (0)