Skip to content

Commit 6435e38

Browse files
authored
Merge pull request microsoft#201843 from microsoft/merogge/speech-editor-terminal
add start/stop speech to text in terminal
2 parents 0b571a2 + 74f9854 commit 6435e38

File tree

7 files changed

+288
-4
lines changed

7 files changed

+288
-4
lines changed
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
/*---------------------------------------------------------------------------------------------
2+
* Copyright (c) Microsoft Corporation. All rights reserved.
3+
* Licensed under the MIT License. See License.txt in the project root for license information.
4+
*--------------------------------------------------------------------------------------------*/
5+
6+
.terminal-speech-to-text {
7+
background-color: var(--vscode-terminal-background, var(--vscode-panel-background));
8+
padding: 2px;
9+
border-radius: 8px;
10+
display: flex;
11+
align-items: center;
12+
white-space: nowrap;
13+
z-index: 1000;
14+
}
15+
16+
.terminal-speech-to-text.codicon.codicon-mic-filled {
17+
display: flex;
18+
align-items: center;
19+
width: 16px;
20+
height: 16px;
21+
}
22+
23+
.terminal-speech-to-text.recording.codicon.codicon-mic-filled {
24+
color: var(--vscode-activityBarBadge-background);
25+
animation: ani-terminal-speech 1s infinite;
26+
}
27+
28+
@keyframes ani-terminal-speech {
29+
0% {
30+
color: var(--vscode-terminalCursor-background);
31+
}
32+
33+
50% {
34+
color: var(--vscode-activityBarBadge-background);
35+
}
36+
37+
100% {
38+
color: var(--vscode-terminalCursor-background);
39+
}
40+
}
41+
42+
.terminal-speech-progress-text {
43+
font-style: italic;
44+
color: var(--vscode-editorGhostText-foreground) !important;
45+
border: 1px solid var(--vscode-editorGhostText-border);
46+
z-index: 1000;
47+
}

src/vs/workbench/contrib/terminal/browser/terminal.contribution.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import 'vs/css!./media/scrollbar';
88
import 'vs/css!./media/widgets';
99
import 'vs/css!./media/xterm';
1010
import 'vs/css!./media/terminal';
11+
import 'vs/css!./media/terminalSpeechToText';
1112
import * as nls from 'vs/nls';
1213
import { URI } from 'vs/base/common/uri';
1314
import { CommandsRegistry } from 'vs/platform/commands/common/commands';

src/vs/workbench/contrib/terminal/browser/terminal.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -789,8 +789,9 @@ export interface ITerminalInstance extends IBaseTerminalInstance {
789789

790790
/**
791791
* Registers and returns a marker
792+
* @param the y offset from the cursor
792793
*/
793-
registerMarker(): IMarker | undefined;
794+
registerMarker(offset?: number): IMarker | undefined;
794795

795796
/**
796797
* Adds a marker to the buffer, mapping it to an ID if provided.

src/vs/workbench/contrib/terminal/browser/terminalActions.ts

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ import { Action2, registerAction2, IAction2Options, MenuId } from 'vs/platform/a
2020
import { ICommandService } from 'vs/platform/commands/common/commands';
2121
import { IConfigurationService } from 'vs/platform/configuration/common/configuration';
2222
import { ContextKeyExpr } from 'vs/platform/contextkey/common/contextkey';
23-
import { ServicesAccessor } from 'vs/platform/instantiation/common/instantiation';
23+
import { IInstantiationService, ServicesAccessor } from 'vs/platform/instantiation/common/instantiation';
2424
import { KeybindingWeight } from 'vs/platform/keybinding/common/keybindingsRegistry';
2525
import { ILabelService } from 'vs/platform/label/common/label';
2626
import { IListService } from 'vs/platform/list/browser/listService';
@@ -64,6 +64,8 @@ import { AccessibleViewProviderId, accessibleViewCurrentProviderId, accessibleVi
6464
import { isKeyboardEvent, isMouseEvent, isPointerEvent } from 'vs/base/browser/dom';
6565
import { editorGroupToColumn } from 'vs/workbench/services/editor/common/editorGroupColumn';
6666
import { InstanceContext } from 'vs/workbench/contrib/terminal/browser/terminalContextMenu';
67+
import { TerminalSpeechToTextSession } from 'vs/workbench/contrib/terminal/browser/terminalSpeechToText';
68+
import { HasSpeechProvider } from 'vs/workbench/contrib/speech/common/speechService';
6769

6870
export const switchTerminalActionViewItemSeparator = '\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500';
6971
export const switchTerminalShowTabsTitle = localize('showTerminalTabs', "Show Tabs");
@@ -1641,6 +1643,34 @@ export function registerTerminalActions() {
16411643
}
16421644
}
16431645
});
1646+
1647+
registerActiveInstanceAction({
1648+
id: TerminalCommandId.StartSpeechToText,
1649+
title: {
1650+
value: localize('workbench.action.startTerminalSpeechToText', "Start Terminal Speech To Text"),
1651+
original: 'Start Terminal Speech To Text'
1652+
},
1653+
precondition: ContextKeyExpr.and(HasSpeechProvider, sharedWhenClause.terminalAvailable),
1654+
f1: true,
1655+
run: (activeInstance, c, accessor) => {
1656+
const instantiationService = accessor.get(IInstantiationService);
1657+
TerminalSpeechToTextSession.getInstance(instantiationService).start();
1658+
}
1659+
});
1660+
1661+
registerActiveInstanceAction({
1662+
id: TerminalCommandId.StopSpeechToText,
1663+
title: {
1664+
value: localize('workbench.action.stopTerminalSpeechToText', "Stop Terminal Speech To Text"),
1665+
original: 'Stop Terminal Speech To Text'
1666+
},
1667+
precondition: ContextKeyExpr.and(HasSpeechProvider, sharedWhenClause.terminalAvailable),
1668+
f1: true,
1669+
run: (activeInstance, c, accessor) => {
1670+
const instantiationService = accessor.get(IInstantiationService);
1671+
TerminalSpeechToTextSession.getInstance(instantiationService).stop(true);
1672+
}
1673+
});
16441674
}
16451675

16461676
interface IRemoteTerminalPick extends IQuickPickItem {

src/vs/workbench/contrib/terminal/browser/terminalInstance.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1440,8 +1440,8 @@ export class TerminalInstance extends Disposable implements ITerminalInstance {
14401440
}
14411441
}
14421442

1443-
public registerMarker(): IMarker | undefined {
1444-
return this.xterm?.raw.registerMarker();
1443+
public registerMarker(offset?: number): IMarker | undefined {
1444+
return this.xterm?.raw.registerMarker(offset);
14451445
}
14461446

14471447
public addBufferMarker(properties: IMarkProperties): void {
Lines changed: 203 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,203 @@
1+
/*---------------------------------------------------------------------------------------------
2+
* Copyright (c) Microsoft Corporation. All rights reserved.
3+
* Licensed under the MIT License. See License.txt in the project root for license information.
4+
*--------------------------------------------------------------------------------------------*/
5+
6+
import { RunOnceScheduler } from 'vs/base/common/async';
7+
import { CancellationTokenSource } from 'vs/base/common/cancellation';
8+
import { Disposable, DisposableStore } from 'vs/base/common/lifecycle';
9+
import { IConfigurationService } from 'vs/platform/configuration/common/configuration';
10+
import { IInstantiationService } from 'vs/platform/instantiation/common/instantiation';
11+
import { AccessibilityVoiceSettingId, SpeechTimeoutDefault } from 'vs/workbench/contrib/accessibility/browser/accessibilityConfiguration';
12+
import { ISpeechService, ISpeechToTextEvent, SpeechToTextStatus } from 'vs/workbench/contrib/speech/common/speechService';
13+
import { ITerminalService } from 'vs/workbench/contrib/terminal/browser/terminal';
14+
import { isNumber } from 'vs/base/common/types';
15+
import type { IDecoration } from '@xterm/xterm';
16+
import { IXtermMarker } from 'vs/platform/terminal/common/capabilities/capabilities';
17+
import { ThemeIcon } from 'vs/base/common/themables';
18+
import { Codicon } from 'vs/base/common/codicons';
19+
20+
const symbolMap: { [key: string]: string } = {
21+
'Ampersand': '&',
22+
'ampersand': '&',
23+
'Dollar': '$',
24+
'dollar': '$',
25+
'Percent': '%',
26+
'percent': '%',
27+
'Asterisk': '*',
28+
'asterisk': '*',
29+
'Plus': '+',
30+
'plus': '+',
31+
'Equals': '=',
32+
'equals': '=',
33+
'Exclamation': '!',
34+
'exclamation': '!',
35+
'Slash': '/',
36+
'slash': '/',
37+
'Backslash': '\\',
38+
'backslash': '\\',
39+
'Dot': '.',
40+
'dot': '.',
41+
'Period': '.',
42+
'period': '.',
43+
'Quote': '\'',
44+
'quote': '\'',
45+
'double quote': '"',
46+
'Double quote': '"',
47+
};
48+
49+
export class TerminalSpeechToTextSession extends Disposable {
50+
private _input: string = '';
51+
private _ghostText: IDecoration | undefined;
52+
private _decoration: IDecoration | undefined;
53+
private _marker: IXtermMarker | undefined;
54+
private _ghostTextMarker: IXtermMarker | undefined;
55+
private static _instance: TerminalSpeechToTextSession | undefined = undefined;
56+
private _acceptTranscriptionScheduler: RunOnceScheduler | undefined;
57+
static getInstance(instantiationService: IInstantiationService): TerminalSpeechToTextSession {
58+
if (!TerminalSpeechToTextSession._instance) {
59+
TerminalSpeechToTextSession._instance = instantiationService.createInstance(TerminalSpeechToTextSession);
60+
}
61+
62+
return TerminalSpeechToTextSession._instance;
63+
}
64+
private _cancellationTokenSource: CancellationTokenSource | undefined;
65+
private readonly _disposables: DisposableStore;
66+
constructor(
67+
@ISpeechService private readonly _speechService: ISpeechService,
68+
@ITerminalService readonly _terminalService: ITerminalService,
69+
@IConfigurationService readonly configurationService: IConfigurationService,
70+
@IInstantiationService readonly _instantationService: IInstantiationService
71+
) {
72+
super();
73+
this._register(this._terminalService.onDidChangeActiveInstance(() => this.stop()));
74+
this._register(this._terminalService.onDidDisposeInstance(() => this.stop()));
75+
this._disposables = this._register(new DisposableStore());
76+
}
77+
78+
start(): void {
79+
this.stop();
80+
let voiceTimeout = this.configurationService.getValue<number>(AccessibilityVoiceSettingId.SpeechTimeout);
81+
if (!isNumber(voiceTimeout) || voiceTimeout < 0) {
82+
voiceTimeout = SpeechTimeoutDefault;
83+
}
84+
this._acceptTranscriptionScheduler = this._disposables.add(new RunOnceScheduler(() => {
85+
this._terminalService.activeInstance?.sendText(this._input, false);
86+
this.stop();
87+
}, voiceTimeout));
88+
this._cancellationTokenSource = this._register(new CancellationTokenSource());
89+
const session = this._disposables.add(this._speechService.createSpeechToTextSession(this._cancellationTokenSource!.token));
90+
91+
this._disposables.add(session.onDidChange((e) => {
92+
if (this._cancellationTokenSource?.token.isCancellationRequested) {
93+
return;
94+
}
95+
switch (e.status) {
96+
case SpeechToTextStatus.Started:
97+
// TODO: play start audio cue
98+
if (!this._decoration) {
99+
this._createDecoration();
100+
}
101+
break;
102+
case SpeechToTextStatus.Recognizing: {
103+
this._updateInput(e);
104+
this._renderGhostText(e);
105+
if (voiceTimeout > 0) {
106+
this._acceptTranscriptionScheduler!.cancel();
107+
}
108+
break;
109+
}
110+
case SpeechToTextStatus.Recognized:
111+
this._updateInput(e);
112+
if (voiceTimeout > 0) {
113+
this._acceptTranscriptionScheduler!.schedule();
114+
}
115+
break;
116+
case SpeechToTextStatus.Stopped:
117+
// TODO: play stop audio cue
118+
this.stop();
119+
break;
120+
}
121+
}));
122+
}
123+
stop(send?: boolean): void {
124+
this._setInactive();
125+
if (send) {
126+
this._acceptTranscriptionScheduler!.cancel();
127+
this._terminalService.activeInstance?.sendText(this._input, false);
128+
}
129+
this._marker?.dispose();
130+
this._ghostTextMarker?.dispose();
131+
this._ghostText?.dispose();
132+
this._ghostText = undefined;
133+
this._decoration?.dispose();
134+
this._decoration = undefined;
135+
this._cancellationTokenSource?.cancel();
136+
this._disposables.clear();
137+
this._input = '';
138+
}
139+
140+
private _updateInput(e: ISpeechToTextEvent): void {
141+
if (e.text) {
142+
let input = e.text.replaceAll(/[.,?;!]/g, '');
143+
for (const symbol of Object.entries(symbolMap)) {
144+
input = input.replace(new RegExp('\\b' + symbol[0] + '\\b'), symbol[1]);
145+
}
146+
this._input = ' ' + input;
147+
}
148+
}
149+
150+
private _createDecoration(): void {
151+
const activeInstance = this._terminalService.activeInstance;
152+
const xterm = activeInstance?.xterm?.raw;
153+
if (!xterm) {
154+
return;
155+
}
156+
this._marker = activeInstance.registerMarker(-1);
157+
if (!this._marker) {
158+
return;
159+
}
160+
this._decoration = xterm.registerDecoration({
161+
marker: this._marker,
162+
layer: 'top',
163+
x: xterm.buffer.active.cursorX ?? 0,
164+
});
165+
this._decoration?.onRender((e: HTMLElement) => {
166+
e.classList.add(...ThemeIcon.asClassNameArray(Codicon.micFilled), 'terminal-speech-to-text', 'recording');
167+
e.style.transform = 'translate(-5px, -5px)';
168+
});
169+
}
170+
171+
private _setInactive(): void {
172+
this._decoration?.element?.classList.remove('recording');
173+
}
174+
175+
private _renderGhostText(e: ISpeechToTextEvent): void {
176+
this._ghostText?.dispose();
177+
const text = e.text;
178+
if (!text) {
179+
return;
180+
}
181+
const activeInstance = this._terminalService.activeInstance;
182+
const xterm = activeInstance?.xterm?.raw;
183+
if (!xterm) {
184+
return;
185+
}
186+
this._ghostTextMarker = activeInstance.registerMarker();
187+
if (!this._ghostTextMarker) {
188+
return;
189+
}
190+
this._ghostText = xterm.registerDecoration({
191+
marker: this._ghostTextMarker,
192+
layer: 'top',
193+
x: xterm.buffer.active.cursorX + 1 ?? 0,
194+
});
195+
this._ghostText?.onRender((e: HTMLElement) => {
196+
e.classList.add('terminal-speech-progress-text');
197+
e.textContent = text;
198+
e.style.width = 'fit-content';
199+
});
200+
}
201+
}
202+
203+

src/vs/workbench/contrib/terminal/common/terminal.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -495,6 +495,8 @@ export const enum TerminalCommandId {
495495
FocusHover = 'workbench.action.terminal.focusHover',
496496
ShowEnvironmentContributions = 'workbench.action.terminal.showEnvironmentContributions',
497497
ToggleStickyScroll = 'workbench.action.terminal.toggleStickyScroll',
498+
StartSpeechToText = 'workbench.action.startTerminalSpeechToText',
499+
StopSpeechToText = 'workbench.action.stopTerminalSpeechToText',
498500

499501
// Developer commands
500502

0 commit comments

Comments
 (0)