-
Notifications
You must be signed in to change notification settings - Fork 2.6k
Add text-to-speech functionality #1412
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
0d4a743
4cd5545
8d98ce6
1a47e9d
88cf106
8f19387
a734d51
be9e57e
409d67c
da8a98c
0b716f2
2223762
1b6b830
b4eed3f
552022d
5f32cb9
1d7de4b
63d6d64
d867e0e
730548e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,66 @@ | ||
| import * as vscode from "vscode" | ||
|
|
||
| let isTtsEnabled = false | ||
| let isSpeaking = false | ||
| const utteranceQueue: string[] = [] | ||
|
|
||
| /** | ||
| * Set tts configuration | ||
| * @param enabled boolean | ||
| */ | ||
| export const setTtsEnabled = (enabled: boolean): void => { | ||
| isTtsEnabled = enabled | ||
| } | ||
|
|
||
| /** | ||
| * Process the next item in the utterance queue | ||
| */ | ||
| const processQueue = async (): Promise<void> => { | ||
| if (!isTtsEnabled || isSpeaking || utteranceQueue.length === 0) { | ||
| return | ||
| } | ||
|
|
||
| try { | ||
| isSpeaking = true | ||
| const nextUtterance = utteranceQueue.shift()! | ||
| const say = require("say") | ||
|
|
||
| // Wrap say.speak in a promise to handle completion | ||
| await new Promise<void>((resolve, reject) => { | ||
| say.speak(nextUtterance, null, null, (err: Error) => { | ||
| if (err) { | ||
| reject(err) | ||
| } else { | ||
| resolve() | ||
| } | ||
| }) | ||
| }) | ||
|
|
||
| isSpeaking = false | ||
| // Process next item in queue if any | ||
| await processQueue() | ||
| } catch (error: any) { | ||
| isSpeaking = false | ||
| vscode.window.showErrorMessage(error.message) | ||
| // Try to continue with next item despite error | ||
| await processQueue() | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Queue a tts message to be spoken | ||
| * @param message string | ||
| * @return void | ||
| */ | ||
| export const playTts = async (message: string): Promise<void> => { | ||
| if (!isTtsEnabled) { | ||
| return | ||
| } | ||
|
|
||
| try { | ||
| utteranceQueue.push(message) | ||
| await processQueue() | ||
| } catch (error: any) { | ||
| vscode.window.showErrorMessage(error.message) | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -86,6 +86,7 @@ const ChatView = ({ isHidden, showAnnouncement, hideAnnouncement, showHistoryVie | |
| const disableAutoScrollRef = useRef(false) | ||
| const [showScrollToBottom, setShowScrollToBottom] = useState(false) | ||
| const [isAtBottom, setIsAtBottom] = useState(false) | ||
| const lastTtsRef = useRef<string>("") | ||
|
|
||
| const [wasStreaming, setWasStreaming] = useState<boolean>(false) | ||
| const [showCheckpointWarning, setShowCheckpointWarning] = useState<boolean>(false) | ||
|
|
@@ -99,6 +100,10 @@ const ChatView = ({ isHidden, showAnnouncement, hideAnnouncement, showHistoryVie | |
| vscode.postMessage({ type: "playSound", audioType }) | ||
| } | ||
|
|
||
| function playTts(text: string) { | ||
| vscode.postMessage({ type: "playTts", text }) | ||
| } | ||
|
|
||
| useDeepCompareEffect(() => { | ||
| // if last message is an ask, show user ask UI | ||
| // if user finished a task, then start a new task with a new conversation history since in this moment that the extension is waiting for user response, the user could close the extension and the conversation history would be lost. | ||
|
|
@@ -659,6 +664,25 @@ const ChatView = ({ isHidden, showAnnouncement, hideAnnouncement, showHistoryVie | |
| ) | ||
|
|
||
| useEffect(() => { | ||
| // skip input message | ||
| if (lastMessage && messages.length > 1) { | ||
| let text = lastMessage?.text || "" | ||
|
|
||
| if ( | ||
| lastMessage.type === "say" && // is a say message | ||
| !lastMessage.partial && // not a partial message | ||
| !text.startsWith("{") && // not a json object | ||
| text !== lastTtsRef.current // not the same as last TTS message | ||
| ) { | ||
| try { | ||
| playTts(text) | ||
| lastTtsRef.current = text | ||
| } catch (error) { | ||
| console.error("Failed to execute text-to-speech:", error) | ||
| } | ||
| } | ||
| } | ||
|
||
|
|
||
| // Only execute when isStreaming changes from true to false | ||
| if (wasStreaming && !isStreaming && lastMessage) { | ||
| // Play appropriate sound based on lastMessage content | ||
|
|
@@ -691,7 +715,7 @@ const ChatView = ({ isHidden, showAnnouncement, hideAnnouncement, showHistoryVie | |
| } | ||
| // Update previous value | ||
| setWasStreaming(isStreaming) | ||
| }, [isStreaming, lastMessage, wasStreaming, isAutoApproved]) | ||
| }, [isStreaming, lastMessage, wasStreaming, isAutoApproved, messages.length]) | ||
|
|
||
| const isBrowserSessionMessage = (message: ClineMessage): boolean => { | ||
| // which of visible messages are browser session messages, see above | ||
|
|
||

Uh oh!
There was an error while loading. Please reload this page.