Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
0d4a743
Add text-to-speech functionality
heyseth Mar 6, 2025
4cd5545
Merge branch 'RooVetGit:main' into feature/textToSpeech
heyseth Mar 6, 2025
8d98ce6
Merge branch 'RooVetGit:main' into feature/textToSpeech
heyseth Mar 7, 2025
1a47e9d
Merge branch 'RooVetGit:main' into feature/textToSpeech
heyseth Mar 8, 2025
88cf106
Add speed config option to text-to-speech
heyseth Mar 8, 2025
8f19387
Fix test case for tts speed slider
heyseth Mar 8, 2025
a734d51
Fix test case for tts speed slider (really)
heyseth Mar 8, 2025
be9e57e
Disabled error message logging in tts.ts
heyseth Mar 9, 2025
409d67c
Merge branch 'RooVetGit:main' into feature/textToSpeech
heyseth Mar 9, 2025
da8a98c
Merge branch 'RooVetGit:main' into feature/textToSpeech
heyseth Mar 10, 2025
0b716f2
Merge branch 'RooVetGit:main' into feature/textToSpeech
heyseth Mar 11, 2025
2223762
Merge branch 'RooVetGit:main' into feature/textToSpeech
heyseth Mar 16, 2025
1b6b830
ignore markdown and mermaid diagrams in TTS
heyseth Mar 17, 2025
b4eed3f
Merge branch 'feature/textToSpeech' of https://github.com/heyseth/Roo…
heyseth Mar 17, 2025
552022d
add ttsEnabled and ttsSpeed to GlobalStateKey
heyseth Mar 17, 2025
5f32cb9
Merge remote-tracking branch 'upstream/main' into feature/textToSpeech
heyseth Mar 17, 2025
1d7de4b
fix failing webview test for save button
heyseth Mar 17, 2025
63d6d64
Merge remote-tracking branch 'origin/main' into feature/textToSpeech
mrubens Mar 18, 2025
d867e0e
Translations
mrubens Mar 18, 2025
730548e
Fix tests
mrubens Mar 18, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,7 @@
"puppeteer-chromium-resolver": "^23.0.0",
"puppeteer-core": "^23.4.0",
"reconnecting-eventsource": "^1.6.4",
"say": "^0.16.0",
"serialize-error": "^11.0.3",
"simple-git": "^3.27.0",
"sound-play": "^1.1.0",
Expand Down
29 changes: 29 additions & 0 deletions src/core/webview/ClineProvider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ import { BrowserSession } from "../../services/browser/BrowserSession"
import { discoverChromeInstances } from "../../services/browser/browserDiscovery"
import { fileExistsAtPath } from "../../utils/fs"
import { playSound, setSoundEnabled, setSoundVolume } from "../../utils/sound"
import { playTts, setTtsEnabled, setTtsSpeed } from "../../utils/tts"
import { singleCompletionHandler } from "../../utils/single-completion-handler"
import { searchCommits } from "../../utils/git"
import { getDiffStrategy } from "../diff/DiffStrategy"
Expand Down Expand Up @@ -356,6 +357,11 @@ export class ClineProvider extends EventEmitter<ClineProviderEvents> implements
setSoundEnabled(soundEnabled ?? false)
})

// Initialize tts enabled state
this.getState().then(({ ttsEnabled }) => {
setTtsEnabled(ttsEnabled ?? false)
})

webviewView.webview.options = {
// Allow scripts in the webview
enableScripts: true,
Expand Down Expand Up @@ -1233,6 +1239,23 @@ export class ClineProvider extends EventEmitter<ClineProviderEvents> implements
setSoundVolume(soundVolume)
await this.postStateToWebview()
break
case "ttsEnabled":
const ttsEnabled = message.bool ?? true
await this.updateGlobalState("ttsEnabled", ttsEnabled)
setTtsEnabled(ttsEnabled) // Add this line to update the tts utility
await this.postStateToWebview()
break
case "ttsSpeed":
const ttsSpeed = message.value ?? 1.0
await this.updateGlobalState("ttsSpeed", ttsSpeed)
setTtsSpeed(ttsSpeed)
await this.postStateToWebview()
break
case "playTts":
if (message.text) {
playTts(message.text)
}
break
case "diffEnabled":
const diffEnabled = message.bool ?? true
await this.updateGlobalState("diffEnabled", diffEnabled)
Expand Down Expand Up @@ -2333,6 +2356,8 @@ export class ClineProvider extends EventEmitter<ClineProviderEvents> implements
alwaysAllowModeSwitch,
alwaysAllowSubtasks,
soundEnabled,
ttsEnabled,
ttsSpeed,
diffEnabled,
enableCheckpoints,
checkpointStorage,
Expand Down Expand Up @@ -2392,6 +2417,8 @@ export class ClineProvider extends EventEmitter<ClineProviderEvents> implements
.filter((item: HistoryItem) => item.ts && item.task)
.sort((a: HistoryItem, b: HistoryItem) => b.ts - a.ts),
soundEnabled: soundEnabled ?? false,
ttsEnabled: ttsEnabled ?? false,
ttsSpeed: ttsSpeed ?? 1.0,
diffEnabled: diffEnabled ?? true,
enableCheckpoints: enableCheckpoints ?? true,
checkpointStorage: checkpointStorage ?? "task",
Expand Down Expand Up @@ -2551,6 +2578,8 @@ export class ClineProvider extends EventEmitter<ClineProviderEvents> implements
taskHistory: stateValues.taskHistory,
allowedCommands: stateValues.allowedCommands,
soundEnabled: stateValues.soundEnabled ?? false,
ttsEnabled: stateValues.ttsEnabled ?? false,
ttsSpeed: stateValues.ttsSpeed ?? 1.0,
diffEnabled: stateValues.diffEnabled ?? true,
enableCheckpoints: stateValues.enableCheckpoints ?? true,
checkpointStorage: stateValues.checkpointStorage ?? "task",
Expand Down
20 changes: 20 additions & 0 deletions src/core/webview/__tests__/ClineProvider.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import { ClineProvider } from "../ClineProvider"
import { ExtensionMessage, ExtensionState } from "../../../shared/ExtensionMessage"
import { GlobalStateKey, SecretKey } from "../../../shared/globalState"
import { setSoundEnabled } from "../../../utils/sound"
import { setTtsEnabled } from "../../../utils/tts"
import { defaultModeSlug } from "../../../shared/modes"
import { experimentDefault } from "../../../shared/experiments"
import { Cline } from "../../Cline"
Expand Down Expand Up @@ -271,6 +272,11 @@ jest.mock("../../../utils/sound", () => ({
setSoundEnabled: jest.fn(),
}))

// Mock tts utility
jest.mock("../../../utils/tts", () => ({
setTtsEnabled: jest.fn(),
}))

// Mock ESM modules
jest.mock("p-wait-for", () => ({
__esModule: true,
Expand Down Expand Up @@ -506,6 +512,7 @@ describe("ClineProvider", () => {
alwaysAllowMcp: false,
uriScheme: "vscode",
soundEnabled: false,
ttsEnabled: false,
diffEnabled: false,
enableCheckpoints: false,
checkpointStorage: "task",
Expand Down Expand Up @@ -603,6 +610,7 @@ describe("ClineProvider", () => {
expect(state).toHaveProperty("alwaysAllowBrowser")
expect(state).toHaveProperty("taskHistory")
expect(state).toHaveProperty("soundEnabled")
expect(state).toHaveProperty("ttsEnabled")
expect(state).toHaveProperty("diffEnabled")
expect(state).toHaveProperty("writeDelayMs")
})
Expand Down Expand Up @@ -666,6 +674,18 @@ describe("ClineProvider", () => {
expect(setSoundEnabled).toHaveBeenCalledWith(false)
expect(mockContext.globalState.update).toHaveBeenCalledWith("soundEnabled", false)
expect(mockPostMessage).toHaveBeenCalled()

// Simulate setting tts to enabled
await messageHandler({ type: "ttsEnabled", bool: true })
expect(setTtsEnabled).toHaveBeenCalledWith(true)
expect(mockContext.globalState.update).toHaveBeenCalledWith("ttsEnabled", true)
expect(mockPostMessage).toHaveBeenCalled()

// Simulate setting tts to disabled
await messageHandler({ type: "ttsEnabled", bool: false })
expect(setTtsEnabled).toHaveBeenCalledWith(false)
expect(mockContext.globalState.update).toHaveBeenCalledWith("ttsEnabled", false)
expect(mockPostMessage).toHaveBeenCalled()
})

test("requestDelaySeconds defaults to 10 seconds", async () => {
Expand Down
2 changes: 2 additions & 0 deletions src/exports/roo-code.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,8 @@ export type GlobalStateKey =
| "openRouterUseMiddleOutTransform"
| "googleGeminiBaseUrl"
| "allowedCommands"
| "ttsEnabled"
| "ttsSpeed"
| "soundEnabled"
| "soundVolume"
| "diffEnabled"
Expand Down
2 changes: 2 additions & 0 deletions src/shared/ExtensionMessage.ts
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,8 @@ export interface ExtensionState {
currentTaskItem?: HistoryItem
allowedCommands?: string[]
soundEnabled?: boolean
ttsEnabled?: boolean
ttsSpeed?: number
soundVolume?: number
diffEnabled?: boolean
enableCheckpoints: boolean
Expand Down
3 changes: 3 additions & 0 deletions src/shared/WebviewMessage.ts
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,10 @@ export interface WebviewMessage {
| "alwaysAllowModeSwitch"
| "alwaysAllowSubtasks"
| "playSound"
| "playTts"
| "soundEnabled"
| "ttsEnabled"
| "ttsSpeed"
| "soundVolume"
| "diffEnabled"
| "enableCheckpoints"
Expand Down
2 changes: 2 additions & 0 deletions src/shared/globalState.ts
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ export const GLOBAL_STATE_KEYS = [
"googleGeminiBaseUrl",
"allowedCommands",
"soundEnabled",
"ttsEnabled",
"ttsSpeed",
"soundVolume",
"diffEnabled",
"enableCheckpoints",
Expand Down
75 changes: 75 additions & 0 deletions src/utils/tts.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import * as vscode from "vscode"

let isTtsEnabled = false
let speed = 1.0
let isSpeaking = false
const utteranceQueue: string[] = []

/**
* Set tts configuration
* @param enabled boolean
*/
export const setTtsEnabled = (enabled: boolean): void => {
isTtsEnabled = enabled
}

/**
* Set tts speed
* @param speed number
*/
export const setTtsSpeed = (newSpeed: number): void => {
speed = newSpeed
}

/**
* Process the next item in the utterance queue
*/
const processQueue = async (): Promise<void> => {
if (!isTtsEnabled || isSpeaking || utteranceQueue.length === 0) {
return
}

try {
isSpeaking = true
const nextUtterance = utteranceQueue.shift()!
const say = require("say")

// Wrap say.speak in a promise to handle completion
await new Promise<void>((resolve, reject) => {
say.speak(nextUtterance, null, speed, (err: Error) => {
if (err) {
reject(err)
} else {
resolve()
}
})
})

isSpeaking = false
// Process next item in queue if any
await processQueue()
} catch (error: any) {
isSpeaking = false
//vscode.window.showErrorMessage(error.message)
// Try to continue with next item despite error
await processQueue()
}
}

/**
* Queue a tts message to be spoken
* @param message string
* @return void
*/
export const playTts = async (message: string): Promise<void> => {
if (!isTtsEnabled) {
return
}

try {
utteranceQueue.push(message)
await processQueue()
} catch (error: any) {
//vscode.window.showErrorMessage(error.message)
}
}
6 changes: 6 additions & 0 deletions webview-ui/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions webview-ui/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
"react-virtuoso": "^4.7.13",
"rehype-highlight": "^7.0.0",
"remark-gfm": "^4.0.1",
"remove-markdown": "^0.6.0",
"shell-quote": "^1.8.2",
"styled-components": "^6.1.13",
"tailwind-merge": "^2.6.0",
Expand Down
36 changes: 35 additions & 1 deletion webview-ui/src/components/chat/ChatView.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import { validateCommand } from "../../utils/command-validation"
import { getAllModes } from "../../../../src/shared/modes"
import TelemetryBanner from "../common/TelemetryBanner"
import { useAppTranslation } from "@/i18n/TranslationContext"
import removeMd from "remove-markdown"

interface ChatViewProps {
isHidden: boolean
Expand Down Expand Up @@ -91,6 +92,7 @@ const ChatView = ({ isHidden, showAnnouncement, hideAnnouncement, showHistoryVie
const disableAutoScrollRef = useRef(false)
const [showScrollToBottom, setShowScrollToBottom] = useState(false)
const [isAtBottom, setIsAtBottom] = useState(false)
const lastTtsRef = useRef<string>("")

const [wasStreaming, setWasStreaming] = useState<boolean>(false)
const [showCheckpointWarning, setShowCheckpointWarning] = useState<boolean>(false)
Expand All @@ -104,6 +106,10 @@ const ChatView = ({ isHidden, showAnnouncement, hideAnnouncement, showHistoryVie
vscode.postMessage({ type: "playSound", audioType })
}

function playTts(text: string) {
vscode.postMessage({ type: "playTts", text })
}

useDeepCompareEffect(() => {
// if last message is an ask, show user ask UI
// if user finished a task, then start a new task with a new conversation history since in this moment that the extension is waiting for user response, the user could close the extension and the conversation history would be lost.
Expand Down Expand Up @@ -674,6 +680,34 @@ const ChatView = ({ isHidden, showAnnouncement, hideAnnouncement, showHistoryVie
)

useEffect(() => {
// this ensures the first message is not read, future user messages are labelled as user_feedback
if (lastMessage && messages.length > 1) {
//console.log(JSON.stringify(lastMessage))
if (
lastMessage.text && // has text
(lastMessage.say === "text" || lastMessage.say === "completion_result") && // is a text message
!lastMessage.partial && // not a partial message
!lastMessage.text.startsWith("{") // not a json object
) {
let text = lastMessage?.text || ""
const mermaidRegex = /```mermaid[\s\S]*?```/g
// remove mermaid diagrams from text
text = text.replace(mermaidRegex, "")
// remove markdown from text
text = removeMd(text)

// ensure message is not a duplicate of last read message
if (text !== lastTtsRef.current) {
try {
playTts(text)
lastTtsRef.current = text
} catch (error) {
console.error("Failed to execute text-to-speech:", error)
}
}
}
}

// Only execute when isStreaming changes from true to false
if (wasStreaming && !isStreaming && lastMessage) {
// Play appropriate sound based on lastMessage content
Expand Down Expand Up @@ -706,7 +740,7 @@ const ChatView = ({ isHidden, showAnnouncement, hideAnnouncement, showHistoryVie
}
// Update previous value
setWasStreaming(isStreaming)
}, [isStreaming, lastMessage, wasStreaming, isAutoApproved])
}, [isStreaming, lastMessage, wasStreaming, isAutoApproved, messages.length])

const isBrowserSessionMessage = (message: ClineMessage): boolean => {
// which of visible messages are browser session messages, see above
Expand Down
Loading
Loading