Skip to content

Commit 66bbc30

Browse files
committed
feat(chat): add webspeech speech-to-text implementation
1 parent 10fe0f2 commit 66bbc30

File tree

6 files changed

+209
-67
lines changed

6 files changed

+209
-67
lines changed

src/components/chat/chat-input.ts

Lines changed: 32 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,9 @@ import { bindIf, hasFiles, isEmpty, trimmedHtml } from '../common/util.js';
1515
import IgcIconComponent from '../icon/icon.js';
1616
import IgcTextareaComponent from '../textarea/textarea.js';
1717
import type { ChatState } from './chat-state.js';
18-
import { SttClient } from './extras/stt-client.js';
18+
import { BackendSttClient } from './extras/stt-client-backend.js';
19+
import type { ISttClient } from './extras/stt-client-base.js';
20+
import { WebSpeechSttClient } from './extras/stt-client-webspeech.js';
1921
import { styles } from './themes/input.base.css.js';
2022
import { all } from './themes/input.js';
2123
import { styles as shared } from './themes/shared/input/input.common.css.js';
@@ -113,7 +115,7 @@ export default class IgcChatInputComponent extends LitElement {
113115
return this._state.acceptedFileTypes;
114116
}
115117

116-
private _sttClient?: SttClient;
118+
private _sttClient?: ISttClient;
117119

118120
@property()
119121
isRecording = false;
@@ -175,20 +177,34 @@ export default class IgcChatInputComponent extends LitElement {
175177

176178
async _toggleMic() {
177179
if (!this.isRecording) {
178-
this._sttClient = new SttClient(
179-
this._state.options?.sttOptions?.serviceUri!,
180-
'this._state.host.sttToken',
181-
this.onPulseSignal,
182-
this.onStartCountdown,
183-
this.onTranscript,
184-
this.onStopInProgress,
185-
this.onFinishedTranscribing
186-
);
187-
await this._sttClient.start(this._state.options?.sttOptions?.lang);
180+
if (this._state.options?.speechToText?.serviceProvider === 'webspeech') {
181+
this._sttClient = new WebSpeechSttClient(
182+
this.onPulseSignal,
183+
this.onStartCountdown,
184+
this.onTranscript,
185+
this.onStopInProgress,
186+
this.onFinishedTranscribing
187+
);
188+
} else if (
189+
this._state.options?.speechToText?.serviceProvider === 'backend' &&
190+
this._state.options?.speechToText?.serviceUri
191+
) {
192+
this._sttClient = new BackendSttClient(
193+
this._state.options?.speechToText?.serviceUri!,
194+
this.onPulseSignal,
195+
this.onStartCountdown,
196+
this.onTranscript,
197+
this.onStopInProgress,
198+
this.onFinishedTranscribing
199+
);
200+
} else {
201+
// console.error('No STT service configured');
202+
}
203+
204+
await this._sttClient.start(this._state.options?.speechToText?.lang);
188205
this.isRecording = true;
189206
this.isStopInProgress = false;
190207
} else {
191-
this.isStopInProgress = true;
192208
this._sttClient?.stop();
193209
}
194210
}
@@ -401,7 +417,7 @@ export default class IgcChatInputComponent extends LitElement {
401417
}
402418

403419
private _renderSpeechToTextButton() {
404-
const sttEnabled = this._state.options?.sttOptions?.enable;
420+
const sttEnabled = this._state.options?.speechToText?.enable;
405421

406422
return html`${cache(
407423
sttEnabled
@@ -419,12 +435,12 @@ export default class IgcChatInputComponent extends LitElement {
419435
${this.isRecording && !this.isStopInProgress
420436
? html`
421437
<svg class="countdown-ring" viewBox="0 0 36 36">
422-
<circle class="ring-bg" cx="18" cy="18" r="16"></circle>
438+
<circle class="ring-bg" cx="18" cy="18" r="14"></circle>
423439
<circle
424440
class="ring-progress"
425441
cx="18"
426442
cy="18"
427-
r="16"
443+
r="14"
428444
></circle>
429445
</svg>
430446
`

src/components/chat/extras/stt-client.ts renamed to src/components/chat/extras/stt-client-backend.ts

Lines changed: 32 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,36 @@
11
import * as signalR from '@microsoft/signalr';
2+
import { BaseSttClient } from './stt-client-base.js';
23

34
const HUB_TRANSCRIBE_AUDIO_CHUNK = 'TranscribeAudioChunk';
45
const HUB_RECEIVE_TRANSCRIPT = 'ReceiveTranscript';
56
const HUB_COMPLETE_TRANSCRIBE = 'FinalizeTranscription';
6-
const SILENCE_TIMEOUT_MS = 4000;
7-
const SILENCE_GRACE_PERIOD = 1000;
87

9-
export class SttClient {
8+
export class BackendSttClient extends BaseSttClient {
109
private hubConnection?: signalR.HubConnection;
1110
private mediaRecorder?: MediaRecorder;
12-
private isRecording = false;
1311
private isStopInProgress = false;
1412
private isStopCompleted = false;
1513
private stopHubTimeout: any;
16-
private silenceTimeout: any;
17-
private silenceGraceTimeout: any;
18-
private isAutoFinished = false;
19-
private isCountdownRunning = false;
2014

2115
constructor(
22-
private hubUrl: string,
23-
private token: string,
24-
private onPulseSignal: () => void,
25-
private onStartCountdown: (ms: number | null) => void,
26-
private onTranscript: (text: string) => void,
27-
private onStopInProgress: () => void,
28-
private onFinishedTranscribing: (finish: string) => void
29-
) {}
16+
hubUrl: string,
17+
onPulseSignal: () => void,
18+
onStartCountdown: (ms: number | null) => void,
19+
onTranscript: (text: string) => void,
20+
onStopInProgress: () => void,
21+
onFinishedTranscribing: (finish: 'auto' | 'manual') => void
22+
) {
23+
super(
24+
onPulseSignal,
25+
onStartCountdown,
26+
onTranscript,
27+
onStopInProgress,
28+
onFinishedTranscribing
29+
);
30+
this.hubUrl = hubUrl;
31+
}
32+
33+
private hubUrl: string;
3034

3135
async start(language = 'en-US') {
3236
if (this.isRecording) {
@@ -103,11 +107,16 @@ export class SttClient {
103107
) {
104108
const buffer = await data.arrayBuffer();
105109
const base64Audio = btoa(String.fromCharCode(...new Uint8Array(buffer)));
106-
await this.hubConnection.invoke(
107-
HUB_TRANSCRIBE_AUDIO_CHUNK,
108-
base64Audio,
109-
language
110-
);
110+
try {
111+
await this.hubConnection.invoke(
112+
HUB_TRANSCRIBE_AUDIO_CHUNK,
113+
base64Audio,
114+
language
115+
);
116+
} catch {
117+
//report.error("STT invoke failed:", err); TOTO
118+
this.stop();
119+
}
111120
}
112121
}
113122

@@ -140,8 +149,8 @@ export class SttClient {
140149

141150
private createHubConnection() {
142151
const hubConnection = new signalR.HubConnectionBuilder()
143-
.withUrl(this.hubUrl, { accessTokenFactory: () => this.token })
144-
.configureLogging(signalR.LogLevel.Information)
152+
.withUrl(this.hubUrl)
153+
.configureLogging(signalR.LogLevel.Warning)
145154
.build();
146155

147156
return hubConnection;
@@ -162,30 +171,4 @@ export class SttClient {
162171
this.isAutoFinished = false;
163172
}
164173
}
165-
166-
private restartGracePeriod() {
167-
if (this.silenceGraceTimeout) {
168-
clearTimeout(this.silenceGraceTimeout);
169-
}
170-
171-
this.silenceGraceTimeout = setTimeout(() => {
172-
this.isCountdownRunning = true;
173-
this.onStartCountdown(SILENCE_TIMEOUT_MS - SILENCE_GRACE_PERIOD);
174-
}, SILENCE_GRACE_PERIOD);
175-
}
176-
177-
private resetSilenceTimer() {
178-
this.clearSilenceTimer();
179-
this.silenceTimeout = setTimeout(() => {
180-
this.isAutoFinished = true;
181-
this.stop();
182-
}, SILENCE_TIMEOUT_MS);
183-
}
184-
185-
private clearSilenceTimer() {
186-
if (this.silenceTimeout) {
187-
clearTimeout(this.silenceTimeout);
188-
this.silenceTimeout = null;
189-
}
190-
}
191174
}
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
export interface ISttClient {
2+
/** Start recording and transcribing */
3+
start(language?: string): Promise<void>;
4+
5+
/** Stop recording/transcribing */
6+
stop(): void;
7+
8+
/** Whether recording is currently active */
9+
readonly isRecording: boolean;
10+
11+
/** Signal emitted when audio activity is detected */
12+
readonly onPulseSignal: () => void;
13+
14+
/** Called when the countdown should start or stop (pass null to stop) */
15+
readonly onStartCountdown: (ms: number | null) => void;
16+
17+
/** Called when a partial or final transcript is produced */
18+
readonly onTranscript: (text: string) => void;
19+
20+
/** Called when stop is in progress (manual or automatic) */
21+
readonly onStopInProgress: () => void;
22+
23+
/** Called when transcription fully finishes */
24+
readonly onFinishedTranscribing: (finish: 'auto' | 'manual') => void;
25+
}
26+
27+
export abstract class BaseSttClient implements ISttClient {
28+
protected static readonly SILENCE_TIMEOUT_MS = 4000;
29+
protected static readonly SILENCE_GRACE_PERIOD = 1000;
30+
31+
protected silenceTimeout: any = null;
32+
protected silenceGraceTimeout: any = null;
33+
protected isCountdownRunning = false;
34+
protected isAutoFinished = false;
35+
36+
isRecording = false;
37+
38+
constructor(
39+
public onPulseSignal: () => void,
40+
public onStartCountdown: (ms: number | null) => void,
41+
public onTranscript: (text: string) => void,
42+
public onStopInProgress: () => void,
43+
public onFinishedTranscribing: (finish: 'auto' | 'manual') => void
44+
) {}
45+
46+
abstract start(language?: string): Promise<void>;
47+
abstract stop(): void;
48+
49+
/** Clears the silence timeout */
50+
protected clearSilenceTimer() {
51+
if (this.silenceTimeout) {
52+
clearTimeout(this.silenceTimeout);
53+
this.silenceTimeout = null;
54+
}
55+
}
56+
57+
/** Resets silence timer and auto-stop trigger */
58+
protected resetSilenceTimer() {
59+
this.clearSilenceTimer();
60+
this.silenceTimeout = setTimeout(() => {
61+
this.isAutoFinished = true;
62+
this.stop();
63+
}, BaseSttClient.SILENCE_TIMEOUT_MS);
64+
}
65+
66+
/** Restarts grace period before countdown starts */
67+
protected restartGracePeriod() {
68+
if (this.silenceGraceTimeout) {
69+
clearTimeout(this.silenceGraceTimeout);
70+
}
71+
72+
this.silenceGraceTimeout = setTimeout(() => {
73+
this.isCountdownRunning = true;
74+
this.onStartCountdown(
75+
BaseSttClient.SILENCE_TIMEOUT_MS - BaseSttClient.SILENCE_GRACE_PERIOD
76+
);
77+
}, BaseSttClient.SILENCE_GRACE_PERIOD);
78+
}
79+
}
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
import { BaseSttClient } from './stt-client-base.js';
2+
3+
export class WebSpeechSttClient extends BaseSttClient {
4+
private recognition?: SpeechRecognition;
5+
6+
async start(language = 'en-US') {
7+
if (this.isRecording) {
8+
return;
9+
}
10+
11+
this.onTranscript('');
12+
13+
const SpeechRecognition =
14+
window.SpeechRecognition || (window as any).webkitSpeechRecognition;
15+
if (!SpeechRecognition) {
16+
throw new Error('Web Speech API not supported in this browser.');
17+
}
18+
this.recognition = new SpeechRecognition();
19+
this.recognition.continuous = true;
20+
this.recognition.interimResults = true;
21+
this.recognition.lang = language;
22+
23+
this.recognition.onresult = (event: any) => {
24+
this.handleTranscriptEvent(event);
25+
};
26+
27+
this.recognition.onerror = () => {
28+
//console.error("Speech recognition error", e);
29+
};
30+
31+
this.recognition?.start();
32+
33+
this.isRecording = true;
34+
this.resetSilenceTimer();
35+
this.restartGracePeriod();
36+
}
37+
38+
stop() {
39+
if (!this.isRecording) {
40+
return;
41+
}
42+
this.recognition?.stop();
43+
this.isRecording = false;
44+
this.onFinishedTranscribing(this.isAutoFinished ? 'auto' : 'manual');
45+
this.isAutoFinished = false;
46+
}
47+
48+
private handleTranscriptEvent(event: any) {
49+
let transcript = '';
50+
for (let i = 0; i < event.results.length; i++) {
51+
transcript += event.results[i][0].transcript;
52+
}
53+
this.onTranscript(transcript);
54+
this.resetSilenceTimer();
55+
this.onPulseSignal();
56+
if (this.isCountdownRunning) {
57+
this.onStartCountdown(null);
58+
this.isCountdownRunning = false;
59+
}
60+
this.restartGracePeriod();
61+
}
62+
}

src/components/chat/types.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,12 +165,13 @@ export type IgcChatOptions = {
165165
* Configuration options for enabling and customizing speech-to-text functionality.
166166
* If provided, it enables a button in the chat input area that allows users to dictate messages using their voice.
167167
*/
168-
sttOptions?: SpeechToTextOptions;
168+
speechToText?: SpeechToTextOptions;
169169
};
170170

171171
export interface SpeechToTextOptions {
172172
enable: boolean;
173173
lang?: string;
174+
serviceProvider: 'webspeech' | 'backend';
174175
serviceUri?: string;
175176
}
176177
/**

stories/chat.stories.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,9 +145,10 @@ const chat_options: IgcChatOptions = {
145145
inputPlaceholder: 'Type your message here...',
146146
speakPlaceholder: 'Speak...',
147147
headerText: 'Chat',
148-
sttOptions: {
148+
speechToText: {
149149
enable: true,
150150
lang: 'en-US',
151+
serviceProvider: 'webspeech', // 'webspeech' | 'backend'
151152
serviceUri: 'https://localhost:5000/sttHub',
152153
},
153154
renderers: {

0 commit comments

Comments
 (0)