Skip to content

Commit 4112aa6

Browse files
committed
feat: integrate Google Cloud and Azure Text-to-Speech services
- Add TTS provider interface and implementations for native, Google Cloud, and Azure - Create TtsManager to coordinate between different TTS providers - Update UI to allow provider selection and configuration - Add settings for API keys and provider-specific configuration - Maintain backward compatibility with existing native TTS functionality Fixes #6827
1 parent ad0e33e commit 4112aa6

File tree

17 files changed

+1582
-110
lines changed

17 files changed

+1582
-110
lines changed

packages/types/src/global-settings.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,12 @@ export const globalSettingsSchema = z.object({
9595

9696
ttsEnabled: z.boolean().optional(),
9797
ttsSpeed: z.number().optional(),
98+
ttsProvider: z.enum(["native", "google-cloud", "azure"]).optional(),
99+
ttsVoice: z.string().optional(),
100+
googleCloudTtsApiKey: z.string().optional(),
101+
googleCloudTtsProjectId: z.string().optional(),
102+
azureTtsSubscriptionKey: z.string().optional(),
103+
azureTtsRegion: z.string().optional(),
98104
soundEnabled: z.boolean().optional(),
99105
soundVolume: z.number().optional(),
100106

@@ -255,6 +261,12 @@ export const EVALS_SETTINGS: RooCodeSettings = {
255261

256262
ttsEnabled: false,
257263
ttsSpeed: 1,
264+
ttsProvider: "native",
265+
ttsVoice: undefined,
266+
googleCloudTtsApiKey: undefined,
267+
googleCloudTtsProjectId: undefined,
268+
azureTtsSubscriptionKey: undefined,
269+
azureTtsRegion: undefined,
258270
soundEnabled: false,
259271
soundVolume: 0.5,
260272

pnpm-lock.yaml

Lines changed: 377 additions & 15 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/core/webview/ClineProvider.ts

Lines changed: 41 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ import type { IndexProgressUpdate } from "../../services/code-index/interfaces/m
6363
import { MdmService } from "../../services/mdm/MdmService"
6464

6565
import { fileExistsAtPath } from "../../utils/fs"
66-
import { setTtsEnabled, setTtsSpeed } from "../../utils/tts"
66+
import { setTtsEnabled, setTtsSpeed, initializeTts } from "../../utils/tts"
6767
import { getWorkspaceGitInfo } from "../../utils/git"
6868
import { getWorkspacePath } from "../../utils/path"
6969

@@ -532,13 +532,29 @@ export class ClineProvider
532532
},
533533
)
534534

535-
// Initialize tts enabled state
536-
this.getState().then(({ ttsEnabled }) => {
537-
setTtsEnabled(ttsEnabled ?? false)
538-
})
535+
// Initialize TTS with configuration
536+
this.getState().then(async (state) => {
537+
const {
538+
ttsEnabled,
539+
ttsSpeed,
540+
ttsProvider,
541+
googleCloudTtsApiKey,
542+
googleCloudTtsProjectId,
543+
azureTtsSubscriptionKey,
544+
azureTtsRegion,
545+
} = state
546+
547+
// Initialize TTS manager with provider configuration
548+
await initializeTts({
549+
provider: ttsProvider as "native" | "google-cloud" | "azure" | undefined,
550+
googleCloudApiKey: googleCloudTtsApiKey,
551+
googleCloudProjectId: googleCloudTtsProjectId,
552+
azureSubscriptionKey: azureTtsSubscriptionKey,
553+
azureRegion: azureTtsRegion,
554+
})
539555

540-
// Initialize tts speed state
541-
this.getState().then(({ ttsSpeed }) => {
556+
// Set enabled state and speed
557+
setTtsEnabled(ttsEnabled ?? false)
542558
setTtsSpeed(ttsSpeed ?? 1)
543559
})
544560

@@ -1567,6 +1583,12 @@ export class ClineProvider
15671583
soundEnabled,
15681584
ttsEnabled,
15691585
ttsSpeed,
1586+
ttsProvider,
1587+
ttsVoice,
1588+
googleCloudTtsApiKey,
1589+
googleCloudTtsProjectId,
1590+
azureTtsSubscriptionKey,
1591+
azureTtsRegion,
15701592
diffEnabled,
15711593
enableCheckpoints,
15721594
taskHistory,
@@ -1671,6 +1693,12 @@ export class ClineProvider
16711693
soundEnabled: soundEnabled ?? false,
16721694
ttsEnabled: ttsEnabled ?? false,
16731695
ttsSpeed: ttsSpeed ?? 1.0,
1696+
ttsProvider: ttsProvider ?? "native",
1697+
ttsVoice: ttsVoice ?? undefined,
1698+
googleCloudTtsApiKey: googleCloudTtsApiKey ?? undefined,
1699+
googleCloudTtsProjectId: googleCloudTtsProjectId ?? undefined,
1700+
azureTtsSubscriptionKey: azureTtsSubscriptionKey ?? undefined,
1701+
azureTtsRegion: azureTtsRegion ?? undefined,
16741702
diffEnabled: diffEnabled ?? true,
16751703
enableCheckpoints: enableCheckpoints ?? true,
16761704
shouldShowAnnouncement:
@@ -1863,6 +1891,12 @@ export class ClineProvider
18631891
soundEnabled: stateValues.soundEnabled ?? false,
18641892
ttsEnabled: stateValues.ttsEnabled ?? false,
18651893
ttsSpeed: stateValues.ttsSpeed ?? 1.0,
1894+
ttsProvider: stateValues.ttsProvider ?? "native",
1895+
ttsVoice: stateValues.ttsVoice ?? undefined,
1896+
googleCloudTtsApiKey: stateValues.googleCloudTtsApiKey ?? undefined,
1897+
googleCloudTtsProjectId: stateValues.googleCloudTtsProjectId ?? undefined,
1898+
azureTtsSubscriptionKey: stateValues.azureTtsSubscriptionKey ?? undefined,
1899+
azureTtsRegion: stateValues.azureTtsRegion ?? undefined,
18661900
diffEnabled: stateValues.diffEnabled ?? true,
18671901
enableCheckpoints: stateValues.enableCheckpoints ?? true,
18681902
soundVolume: stateValues.soundVolume,

src/core/webview/webviewMessageHandler.ts

Lines changed: 90 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ import { getTheme } from "../../integrations/theme/getTheme"
3636
import { discoverChromeHostUrl, tryChromeHostUrl } from "../../services/browser/browserDiscovery"
3737
import { searchWorkspaceFiles } from "../../services/search/file-search"
3838
import { fileExistsAtPath } from "../../utils/fs"
39-
import { playTts, setTtsEnabled, setTtsSpeed, stopTts } from "../../utils/tts"
39+
import { playTts, setTtsEnabled, setTtsSpeed, stopTts, setTtsProvider, initializeTts } from "../../utils/tts"
4040
import { searchCommits } from "../../utils/git"
4141
import { exportSettings, importSettingsWithFeedback } from "../config/importExport"
4242
import { getOpenAiModels } from "../../api/providers/openai"
@@ -330,11 +330,11 @@ export const webviewMessageHandler = async (
330330
await provider.postStateToWebview()
331331
break
332332
case "allowedMaxRequests":
333-
await updateGlobalState("allowedMaxRequests", message.value)
333+
await updateGlobalState("allowedMaxRequests", Number(message.value))
334334
await provider.postStateToWebview()
335335
break
336336
case "allowedMaxCost":
337-
await updateGlobalState("allowedMaxCost", message.value)
337+
await updateGlobalState("allowedMaxCost", Number(message.value))
338338
await provider.postStateToWebview()
339339
break
340340
case "alwaysAllowSubtasks":
@@ -353,7 +353,7 @@ export const webviewMessageHandler = async (
353353
await provider.postStateToWebview()
354354
break
355355
case "autoCondenseContextPercent":
356-
await updateGlobalState("autoCondenseContextPercent", message.value)
356+
await updateGlobalState("autoCondenseContextPercent", Number(message.value))
357357
await provider.postStateToWebview()
358358
break
359359
case "terminalOperation":
@@ -936,21 +936,88 @@ export const webviewMessageHandler = async (
936936
break
937937
case "soundVolume":
938938
const soundVolume = message.value ?? 0.5
939-
await updateGlobalState("soundVolume", soundVolume)
939+
await updateGlobalState("soundVolume", Number(soundVolume))
940940
await provider.postStateToWebview()
941941
break
942942
case "ttsEnabled":
943943
const ttsEnabled = message.bool ?? true
944944
await updateGlobalState("ttsEnabled", ttsEnabled)
945-
setTtsEnabled(ttsEnabled) // Add this line to update the tts utility
945+
setTtsEnabled(ttsEnabled)
946946
await provider.postStateToWebview()
947947
break
948948
case "ttsSpeed":
949-
const ttsSpeed = message.value ?? 1.0
949+
const ttsSpeed = Number(message.value ?? 1.0)
950950
await updateGlobalState("ttsSpeed", ttsSpeed)
951951
setTtsSpeed(ttsSpeed)
952952
await provider.postStateToWebview()
953953
break
954+
case "ttsProvider":
955+
const ttsProvider = String(message.value) as "native" | "google-cloud" | "azure"
956+
await updateGlobalState("ttsProvider", ttsProvider)
957+
await setTtsProvider(ttsProvider)
958+
await provider.postStateToWebview()
959+
break
960+
case "ttsVoice":
961+
const ttsVoice = String(message.value)
962+
await updateGlobalState("ttsVoice", ttsVoice)
963+
await provider.postStateToWebview()
964+
break
965+
case "googleCloudTtsApiKey":
966+
const googleCloudApiKey = String(message.value)
967+
await updateGlobalState("googleCloudTtsApiKey", googleCloudApiKey)
968+
// Re-initialize TTS with new config
969+
const gcState = await provider.getState()
970+
await initializeTts({
971+
provider: gcState.ttsProvider as "native" | "google-cloud" | "azure" | undefined,
972+
googleCloudApiKey: googleCloudApiKey,
973+
googleCloudProjectId: gcState.googleCloudTtsProjectId,
974+
azureSubscriptionKey: gcState.azureTtsSubscriptionKey,
975+
azureRegion: gcState.azureTtsRegion,
976+
})
977+
await provider.postStateToWebview()
978+
break
979+
case "googleCloudTtsProjectId":
980+
const googleCloudProjectId = String(message.value)
981+
await updateGlobalState("googleCloudTtsProjectId", googleCloudProjectId)
982+
// Re-initialize TTS with new config
983+
const gcpState = await provider.getState()
984+
await initializeTts({
985+
provider: gcpState.ttsProvider as "native" | "google-cloud" | "azure" | undefined,
986+
googleCloudApiKey: gcpState.googleCloudTtsApiKey,
987+
googleCloudProjectId: googleCloudProjectId,
988+
azureSubscriptionKey: gcpState.azureTtsSubscriptionKey,
989+
azureRegion: gcpState.azureTtsRegion,
990+
})
991+
await provider.postStateToWebview()
992+
break
993+
case "azureTtsSubscriptionKey":
994+
const azureSubscriptionKey = String(message.value)
995+
await updateGlobalState("azureTtsSubscriptionKey", azureSubscriptionKey)
996+
// Re-initialize TTS with new config
997+
const azState = await provider.getState()
998+
await initializeTts({
999+
provider: azState.ttsProvider as "native" | "google-cloud" | "azure" | undefined,
1000+
googleCloudApiKey: azState.googleCloudTtsApiKey,
1001+
googleCloudProjectId: azState.googleCloudTtsProjectId,
1002+
azureSubscriptionKey: azureSubscriptionKey,
1003+
azureRegion: azState.azureTtsRegion,
1004+
})
1005+
await provider.postStateToWebview()
1006+
break
1007+
case "azureTtsRegion":
1008+
const azureRegion = String(message.value)
1009+
await updateGlobalState("azureTtsRegion", azureRegion)
1010+
// Re-initialize TTS with new config
1011+
const azrState = await provider.getState()
1012+
await initializeTts({
1013+
provider: azrState.ttsProvider as "native" | "google-cloud" | "azure" | undefined,
1014+
googleCloudApiKey: azrState.googleCloudTtsApiKey,
1015+
googleCloudProjectId: azrState.googleCloudTtsProjectId,
1016+
azureSubscriptionKey: azrState.azureTtsSubscriptionKey,
1017+
azureRegion: azureRegion,
1018+
})
1019+
await provider.postStateToWebview()
1020+
break
9541021
case "playTts":
9551022
if (message.text) {
9561023
playTts(message.text, {
@@ -1028,7 +1095,7 @@ export const webviewMessageHandler = async (
10281095
}
10291096
break
10301097
case "fuzzyMatchThreshold":
1031-
await updateGlobalState("fuzzyMatchThreshold", message.value)
1098+
await updateGlobalState("fuzzyMatchThreshold", Number(message.value))
10321099
await provider.postStateToWebview()
10331100
break
10341101
case "updateVSCodeSetting": {
@@ -1072,11 +1139,11 @@ export const webviewMessageHandler = async (
10721139
await provider.postStateToWebview()
10731140
break
10741141
case "requestDelaySeconds":
1075-
await updateGlobalState("requestDelaySeconds", message.value ?? 5)
1142+
await updateGlobalState("requestDelaySeconds", Number(message.value ?? 5))
10761143
await provider.postStateToWebview()
10771144
break
10781145
case "writeDelayMs":
1079-
await updateGlobalState("writeDelayMs", message.value)
1146+
await updateGlobalState("writeDelayMs", Number(message.value))
10801147
await provider.postStateToWebview()
10811148
break
10821149
case "diagnosticsEnabled":
@@ -1109,10 +1176,10 @@ export const webviewMessageHandler = async (
11091176
}
11101177
break
11111178
case "terminalShellIntegrationTimeout":
1112-
await updateGlobalState("terminalShellIntegrationTimeout", message.value)
1179+
await updateGlobalState("terminalShellIntegrationTimeout", Number(message.value))
11131180
await provider.postStateToWebview()
11141181
if (message.value !== undefined) {
1115-
Terminal.setShellIntegrationTimeout(message.value)
1182+
Terminal.setShellIntegrationTimeout(Number(message.value))
11161183
}
11171184
break
11181185
case "terminalShellIntegrationDisabled":
@@ -1123,10 +1190,10 @@ export const webviewMessageHandler = async (
11231190
}
11241191
break
11251192
case "terminalCommandDelay":
1126-
await updateGlobalState("terminalCommandDelay", message.value)
1193+
await updateGlobalState("terminalCommandDelay", Number(message.value))
11271194
await provider.postStateToWebview()
11281195
if (message.value !== undefined) {
1129-
Terminal.setCommandDelay(message.value)
1196+
Terminal.setCommandDelay(Number(message.value))
11301197
}
11311198
break
11321199
case "terminalPowershellCounter":
@@ -1242,16 +1309,16 @@ export const webviewMessageHandler = async (
12421309
break
12431310
}
12441311
case "screenshotQuality":
1245-
await updateGlobalState("screenshotQuality", message.value)
1312+
await updateGlobalState("screenshotQuality", Number(message.value))
12461313
await provider.postStateToWebview()
12471314
break
12481315
case "maxOpenTabsContext":
1249-
const tabCount = Math.min(Math.max(0, message.value ?? 20), 500)
1316+
const tabCount = Math.min(Math.max(0, Number(message.value ?? 20)), 500)
12501317
await updateGlobalState("maxOpenTabsContext", tabCount)
12511318
await provider.postStateToWebview()
12521319
break
12531320
case "maxWorkspaceFiles":
1254-
const fileCount = Math.min(Math.max(0, message.value ?? 200), 500)
1321+
const fileCount = Math.min(Math.max(0, Number(message.value ?? 200)), 500)
12551322
await updateGlobalState("maxWorkspaceFiles", fileCount)
12561323
await provider.postStateToWebview()
12571324
break
@@ -1260,7 +1327,7 @@ export const webviewMessageHandler = async (
12601327
await provider.postStateToWebview()
12611328
break
12621329
case "followupAutoApproveTimeoutMs":
1263-
await updateGlobalState("followupAutoApproveTimeoutMs", message.value)
1330+
await updateGlobalState("followupAutoApproveTimeoutMs", Number(message.value))
12641331
await provider.postStateToWebview()
12651332
break
12661333
case "browserToolEnabled":
@@ -1281,20 +1348,20 @@ export const webviewMessageHandler = async (
12811348
await provider.postStateToWebview()
12821349
break
12831350
case "maxReadFileLine":
1284-
await updateGlobalState("maxReadFileLine", message.value)
1351+
await updateGlobalState("maxReadFileLine", Number(message.value))
12851352
await provider.postStateToWebview()
12861353
break
12871354
case "maxImageFileSize":
1288-
await updateGlobalState("maxImageFileSize", message.value)
1355+
await updateGlobalState("maxImageFileSize", Number(message.value))
12891356
await provider.postStateToWebview()
12901357
break
12911358
case "maxTotalImageSize":
1292-
await updateGlobalState("maxTotalImageSize", message.value)
1359+
await updateGlobalState("maxTotalImageSize", Number(message.value))
12931360
await provider.postStateToWebview()
12941361
break
12951362
case "maxConcurrentFileReads":
12961363
const valueToSave = message.value // Capture the value intended for saving
1297-
await updateGlobalState("maxConcurrentFileReads", valueToSave)
1364+
await updateGlobalState("maxConcurrentFileReads", Number(valueToSave))
12981365
await provider.postStateToWebview()
12991366
break
13001367
case "includeDiagnosticMessages":
@@ -1304,7 +1371,7 @@ export const webviewMessageHandler = async (
13041371
await provider.postStateToWebview()
13051372
break
13061373
case "maxDiagnosticMessages":
1307-
await updateGlobalState("maxDiagnosticMessages", message.value ?? 50)
1374+
await updateGlobalState("maxDiagnosticMessages", Number(message.value ?? 50))
13081375
await provider.postStateToWebview()
13091376
break
13101377
case "setHistoryPreviewCollapsed": // Add the new case handler

src/package.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -415,6 +415,7 @@
415415
"@anthropic-ai/vertex-sdk": "^0.7.0",
416416
"@aws-sdk/client-bedrock-runtime": "^3.848.0",
417417
"@aws-sdk/credential-providers": "^3.848.0",
418+
"@google-cloud/text-to-speech": "^6.2.0",
418419
"@google/genai": "^1.0.0",
419420
"@lmstudio/sdk": "^1.1.1",
420421
"@mistralai/mistralai": "^1.3.6",
@@ -448,6 +449,7 @@
448449
"isbinaryfile": "^5.0.2",
449450
"lodash.debounce": "^4.0.8",
450451
"mammoth": "^1.9.1",
452+
"microsoft-cognitiveservices-speech-sdk": "^1.45.0",
451453
"monaco-vscode-textmate-theme-converter": "^0.1.7",
452454
"node-cache": "^5.1.2",
453455
"node-ipc": "^12.0.0",

0 commit comments

Comments
 (0)