Skip to content

Commit f16a49d

Browse files
heysethmrubens
andauthored
Add text-to-speech functionality (#1412)
* Add text-to-speech functionality * Add speed config option to text-to-speech * Fix test case for tts speed slider * Fix test case for tts speed slider (really) * Disabled error message logging in tts.ts * ignore markdown and mermaid diagrams in TTS * add ttsEnabled and ttsSpeed to GlobalStateKey * fix failing webview test for save button * Translations * Fix tests --------- Co-authored-by: Matt Rubens <[email protected]>
1 parent eb74f02 commit f16a49d

32 files changed

+392
-14
lines changed

package-lock.json

Lines changed: 17 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -359,6 +359,7 @@
359359
"puppeteer-chromium-resolver": "^23.0.0",
360360
"puppeteer-core": "^23.4.0",
361361
"reconnecting-eventsource": "^1.6.4",
362+
"say": "^0.16.0",
362363
"serialize-error": "^11.0.3",
363364
"simple-git": "^3.27.0",
364365
"sound-play": "^1.1.0",

src/core/webview/ClineProvider.ts

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ import { BrowserSession } from "../../services/browser/BrowserSession"
3939
import { discoverChromeInstances } from "../../services/browser/browserDiscovery"
4040
import { fileExistsAtPath } from "../../utils/fs"
4141
import { playSound, setSoundEnabled, setSoundVolume } from "../../utils/sound"
42+
import { playTts, setTtsEnabled, setTtsSpeed } from "../../utils/tts"
4243
import { singleCompletionHandler } from "../../utils/single-completion-handler"
4344
import { searchCommits } from "../../utils/git"
4445
import { getDiffStrategy } from "../diff/DiffStrategy"
@@ -356,6 +357,11 @@ export class ClineProvider extends EventEmitter<ClineProviderEvents> implements
356357
setSoundEnabled(soundEnabled ?? false)
357358
})
358359

360+
// Initialize tts enabled state
361+
this.getState().then(({ ttsEnabled }) => {
362+
setTtsEnabled(ttsEnabled ?? false)
363+
})
364+
359365
webviewView.webview.options = {
360366
// Allow scripts in the webview
361367
enableScripts: true,
@@ -1233,6 +1239,23 @@ export class ClineProvider extends EventEmitter<ClineProviderEvents> implements
12331239
setSoundVolume(soundVolume)
12341240
await this.postStateToWebview()
12351241
break
1242+
case "ttsEnabled":
1243+
const ttsEnabled = message.bool ?? true
1244+
await this.updateGlobalState("ttsEnabled", ttsEnabled)
1245+
setTtsEnabled(ttsEnabled) // Add this line to update the tts utility
1246+
await this.postStateToWebview()
1247+
break
1248+
case "ttsSpeed":
1249+
const ttsSpeed = message.value ?? 1.0
1250+
await this.updateGlobalState("ttsSpeed", ttsSpeed)
1251+
setTtsSpeed(ttsSpeed)
1252+
await this.postStateToWebview()
1253+
break
1254+
case "playTts":
1255+
if (message.text) {
1256+
playTts(message.text)
1257+
}
1258+
break
12361259
case "diffEnabled":
12371260
const diffEnabled = message.bool ?? true
12381261
await this.updateGlobalState("diffEnabled", diffEnabled)
@@ -2333,6 +2356,8 @@ export class ClineProvider extends EventEmitter<ClineProviderEvents> implements
23332356
alwaysAllowModeSwitch,
23342357
alwaysAllowSubtasks,
23352358
soundEnabled,
2359+
ttsEnabled,
2360+
ttsSpeed,
23362361
diffEnabled,
23372362
enableCheckpoints,
23382363
checkpointStorage,
@@ -2392,6 +2417,8 @@ export class ClineProvider extends EventEmitter<ClineProviderEvents> implements
23922417
.filter((item: HistoryItem) => item.ts && item.task)
23932418
.sort((a: HistoryItem, b: HistoryItem) => b.ts - a.ts),
23942419
soundEnabled: soundEnabled ?? false,
2420+
ttsEnabled: ttsEnabled ?? false,
2421+
ttsSpeed: ttsSpeed ?? 1.0,
23952422
diffEnabled: diffEnabled ?? true,
23962423
enableCheckpoints: enableCheckpoints ?? true,
23972424
checkpointStorage: checkpointStorage ?? "task",
@@ -2551,6 +2578,8 @@ export class ClineProvider extends EventEmitter<ClineProviderEvents> implements
25512578
taskHistory: stateValues.taskHistory,
25522579
allowedCommands: stateValues.allowedCommands,
25532580
soundEnabled: stateValues.soundEnabled ?? false,
2581+
ttsEnabled: stateValues.ttsEnabled ?? false,
2582+
ttsSpeed: stateValues.ttsSpeed ?? 1.0,
25542583
diffEnabled: stateValues.diffEnabled ?? true,
25552584
enableCheckpoints: stateValues.enableCheckpoints ?? true,
25562585
checkpointStorage: stateValues.checkpointStorage ?? "task",

src/core/webview/__tests__/ClineProvider.test.ts

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import { ClineProvider } from "../ClineProvider"
77
import { ExtensionMessage, ExtensionState } from "../../../shared/ExtensionMessage"
88
import { GlobalStateKey, SecretKey } from "../../../shared/globalState"
99
import { setSoundEnabled } from "../../../utils/sound"
10+
import { setTtsEnabled } from "../../../utils/tts"
1011
import { defaultModeSlug } from "../../../shared/modes"
1112
import { experimentDefault } from "../../../shared/experiments"
1213
import { Cline } from "../../Cline"
@@ -271,6 +272,11 @@ jest.mock("../../../utils/sound", () => ({
271272
setSoundEnabled: jest.fn(),
272273
}))
273274

275+
// Mock tts utility
276+
jest.mock("../../../utils/tts", () => ({
277+
setTtsEnabled: jest.fn(),
278+
}))
279+
274280
// Mock ESM modules
275281
jest.mock("p-wait-for", () => ({
276282
__esModule: true,
@@ -506,6 +512,7 @@ describe("ClineProvider", () => {
506512
alwaysAllowMcp: false,
507513
uriScheme: "vscode",
508514
soundEnabled: false,
515+
ttsEnabled: false,
509516
diffEnabled: false,
510517
enableCheckpoints: false,
511518
checkpointStorage: "task",
@@ -603,6 +610,7 @@ describe("ClineProvider", () => {
603610
expect(state).toHaveProperty("alwaysAllowBrowser")
604611
expect(state).toHaveProperty("taskHistory")
605612
expect(state).toHaveProperty("soundEnabled")
613+
expect(state).toHaveProperty("ttsEnabled")
606614
expect(state).toHaveProperty("diffEnabled")
607615
expect(state).toHaveProperty("writeDelayMs")
608616
})
@@ -666,6 +674,18 @@ describe("ClineProvider", () => {
666674
expect(setSoundEnabled).toHaveBeenCalledWith(false)
667675
expect(mockContext.globalState.update).toHaveBeenCalledWith("soundEnabled", false)
668676
expect(mockPostMessage).toHaveBeenCalled()
677+
678+
// Simulate setting tts to enabled
679+
await messageHandler({ type: "ttsEnabled", bool: true })
680+
expect(setTtsEnabled).toHaveBeenCalledWith(true)
681+
expect(mockContext.globalState.update).toHaveBeenCalledWith("ttsEnabled", true)
682+
expect(mockPostMessage).toHaveBeenCalled()
683+
684+
// Simulate setting tts to disabled
685+
await messageHandler({ type: "ttsEnabled", bool: false })
686+
expect(setTtsEnabled).toHaveBeenCalledWith(false)
687+
expect(mockContext.globalState.update).toHaveBeenCalledWith("ttsEnabled", false)
688+
expect(mockPostMessage).toHaveBeenCalled()
669689
})
670690

671691
test("requestDelaySeconds defaults to 10 seconds", async () => {

src/exports/roo-code.d.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,8 @@ export type GlobalStateKey =
207207
| "openRouterUseMiddleOutTransform"
208208
| "googleGeminiBaseUrl"
209209
| "allowedCommands"
210+
| "ttsEnabled"
211+
| "ttsSpeed"
210212
| "soundEnabled"
211213
| "soundVolume"
212214
| "diffEnabled"

src/shared/ExtensionMessage.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,8 @@ export interface ExtensionState {
124124
currentTaskItem?: HistoryItem
125125
allowedCommands?: string[]
126126
soundEnabled?: boolean
127+
ttsEnabled?: boolean
128+
ttsSpeed?: number
127129
soundVolume?: number
128130
diffEnabled?: boolean
129131
enableCheckpoints: boolean

src/shared/WebviewMessage.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,10 @@ export interface WebviewMessage {
5050
| "alwaysAllowModeSwitch"
5151
| "alwaysAllowSubtasks"
5252
| "playSound"
53+
| "playTts"
5354
| "soundEnabled"
55+
| "ttsEnabled"
56+
| "ttsSpeed"
5457
| "soundVolume"
5558
| "diffEnabled"
5659
| "enableCheckpoints"

src/shared/globalState.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,8 @@ export const GLOBAL_STATE_KEYS = [
7676
"googleGeminiBaseUrl",
7777
"allowedCommands",
7878
"soundEnabled",
79+
"ttsEnabled",
80+
"ttsSpeed",
7981
"soundVolume",
8082
"diffEnabled",
8183
"enableCheckpoints",

src/utils/tts.ts

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
import * as vscode from "vscode"
2+
3+
let isTtsEnabled = false
4+
let speed = 1.0
5+
let isSpeaking = false
6+
const utteranceQueue: string[] = []
7+
8+
/**
9+
* Set tts configuration
10+
* @param enabled boolean
11+
*/
12+
export const setTtsEnabled = (enabled: boolean): void => {
13+
isTtsEnabled = enabled
14+
}
15+
16+
/**
17+
* Set tts speed
18+
* @param speed number
19+
*/
20+
export const setTtsSpeed = (newSpeed: number): void => {
21+
speed = newSpeed
22+
}
23+
24+
/**
25+
* Process the next item in the utterance queue
26+
*/
27+
const processQueue = async (): Promise<void> => {
28+
if (!isTtsEnabled || isSpeaking || utteranceQueue.length === 0) {
29+
return
30+
}
31+
32+
try {
33+
isSpeaking = true
34+
const nextUtterance = utteranceQueue.shift()!
35+
const say = require("say")
36+
37+
// Wrap say.speak in a promise to handle completion
38+
await new Promise<void>((resolve, reject) => {
39+
say.speak(nextUtterance, null, speed, (err: Error) => {
40+
if (err) {
41+
reject(err)
42+
} else {
43+
resolve()
44+
}
45+
})
46+
})
47+
48+
isSpeaking = false
49+
// Process next item in queue if any
50+
await processQueue()
51+
} catch (error: any) {
52+
isSpeaking = false
53+
//vscode.window.showErrorMessage(error.message)
54+
// Try to continue with next item despite error
55+
await processQueue()
56+
}
57+
}
58+
59+
/**
60+
* Queue a tts message to be spoken
61+
* @param message string
62+
* @return void
63+
*/
64+
export const playTts = async (message: string): Promise<void> => {
65+
if (!isTtsEnabled) {
66+
return
67+
}
68+
69+
try {
70+
utteranceQueue.push(message)
71+
await processQueue()
72+
} catch (error: any) {
73+
//vscode.window.showErrorMessage(error.message)
74+
}
75+
}

webview-ui/package-lock.json

Lines changed: 6 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)