Skip to content
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 38 additions & 1 deletion apps/sim/app/chat/components/voice-interface/voice-interface.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,10 @@ export function VoiceInterface({
const currentStateRef = useRef<'idle' | 'listening' | 'agent_speaking'>('idle')
const isCallEndedRef = useRef(false)

useEffect(() => {
isCallEndedRef.current = false
}, [])

useEffect(() => {
currentStateRef.current = state
}, [state])
Expand Down Expand Up @@ -119,6 +123,8 @@ export function VoiceInterface({
}, [])

useEffect(() => {
if (isCallEndedRef.current) return

if (isPlayingAudio && state !== 'agent_speaking') {
clearResponseTimeout()
setState('agent_speaking')
Expand All @@ -139,6 +145,9 @@ export function VoiceInterface({
}
}
} else if (!isPlayingAudio && state === 'agent_speaking') {
// Don't unmute/restart if call has ended
if (isCallEndedRef.current) return

setState('idle')
setCurrentTranscript('')

Expand Down Expand Up @@ -226,6 +235,8 @@ export function VoiceInterface({
recognition.onstart = () => {}

recognition.onresult = (event: SpeechRecognitionEvent) => {
if (isCallEndedRef.current) return

const currentState = currentStateRef.current

if (isMutedRef.current || currentState !== 'listening') {
Expand Down Expand Up @@ -303,6 +314,8 @@ export function VoiceInterface({
}, [isSupported, onVoiceTranscript, setResponseTimeout])

const startListening = useCallback(() => {
if (isCallEndedRef.current) return

if (!isInitialized || isMuted || state !== 'idle') {
return
}
Expand All @@ -320,6 +333,9 @@ export function VoiceInterface({
}, [isInitialized, isMuted, state])

const stopListening = useCallback(() => {
// Don't process if call has ended
if (isCallEndedRef.current) return

setState('idle')
setCurrentTranscript('')

Expand All @@ -333,12 +349,15 @@ export function VoiceInterface({
}, [])

const handleInterrupt = useCallback(() => {
if (isCallEndedRef.current) return

if (state === 'agent_speaking') {
onInterrupt?.()
setState('listening')
setCurrentTranscript('')

setIsMuted(false)
isMutedRef.current = false
if (mediaStreamRef.current) {
mediaStreamRef.current.getAudioTracks().forEach((track) => {
track.enabled = true
Expand All @@ -356,11 +375,22 @@ export function VoiceInterface({
}, [state, onInterrupt])

const handleCallEnd = useCallback(() => {
// Mark call as ended FIRST to prevent any effects from restarting recognition
isCallEndedRef.current = true

// Set muted to true to prevent auto-start effect from triggering
setIsMuted(true)
isMutedRef.current = true

setState('idle')
setCurrentTranscript('')
setIsMuted(false)

// Immediately disable audio tracks to stop listening
if (mediaStreamRef.current) {
mediaStreamRef.current.getAudioTracks().forEach((track) => {
track.enabled = false
})
}

if (recognitionRef.current) {
try {
Expand All @@ -377,6 +407,8 @@ export function VoiceInterface({

useEffect(() => {
const handleKeyDown = (event: KeyboardEvent) => {
if (isCallEndedRef.current) return

if (event.code === 'Space') {
event.preventDefault()
handleInterrupt()
Expand All @@ -388,13 +420,16 @@ export function VoiceInterface({
}, [handleInterrupt])

const toggleMute = useCallback(() => {
if (isCallEndedRef.current) return

if (state === 'agent_speaking') {
handleInterrupt()
return
}

const newMutedState = !isMuted
setIsMuted(newMutedState)
isMutedRef.current = newMutedState

if (mediaStreamRef.current) {
mediaStreamRef.current.getAudioTracks().forEach((track) => {
Expand All @@ -417,6 +452,8 @@ export function VoiceInterface({
}, [isSupported, setupSpeechRecognition, setupAudio])

useEffect(() => {
if (isCallEndedRef.current) return

if (isInitialized && !isMuted && state === 'idle') {
startListening()
}
Expand Down
97 changes: 79 additions & 18 deletions apps/sim/executor/handlers/agent/agent-handler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -987,18 +987,19 @@ export class AgentBlockHandler implements BlockHandler {
try {
const executionData = JSON.parse(executionDataHeader)

// If execution data contains full content, persist to memory
if (ctx && inputs && executionData.output?.content) {
const assistantMessage: Message = {
role: 'assistant',
content: executionData.output.content,
}
// Fire and forget - don't await
memoryService
.persistMemoryMessage(ctx, inputs, assistantMessage, block.id)
.catch((error) =>
logger.error('Failed to persist streaming response to memory:', error)
// If execution data contains content or tool calls, persist to memory
if (ctx && inputs && (executionData.output?.content || executionData.output?.toolCalls?.list?.length)) {
const toolCalls = executionData.output?.toolCalls?.list
const messages = this.buildMessagesForMemory(executionData.output.content, toolCalls)

// Fire and forget - don't await, persist all messages
Promise.all(
messages.map((message) =>
memoryService.persistMemoryMessage(ctx, inputs, message, block.id)
)
).catch((error) =>
logger.error('Failed to persist streaming response to memory:', error)
)
}

return {
Expand Down Expand Up @@ -1117,32 +1118,92 @@ export class AgentBlockHandler implements BlockHandler {
return
}

// Extract content from regular response
// Extract content and tool calls from regular response
const blockOutput = result as any
const content = blockOutput?.content
const toolCalls = blockOutput?.toolCalls?.list

if (!content || typeof content !== 'string') {
// Build messages to persist
const messages = this.buildMessagesForMemory(content, toolCalls)

if (messages.length === 0) {
return
}

const assistantMessage: Message = {
role: 'assistant',
content,
// Persist all messages
for (const message of messages) {
await memoryService.persistMemoryMessage(ctx, inputs, message, blockId)
}

await memoryService.persistMemoryMessage(ctx, inputs, assistantMessage, blockId)

logger.debug('Persisted assistant response to memory', {
workflowId: ctx.workflowId,
memoryType: inputs.memoryType,
conversationId: inputs.conversationId,
messageCount: messages.length,
})
} catch (error) {
logger.error('Failed to persist response to memory:', error)
// Don't throw - memory persistence failure shouldn't break workflow execution
}
}

/**
* Builds messages for memory storage including tool calls and results
* Returns proper OpenAI-compatible message format:
* - Assistant message with tool_calls array (if tools were used)
* - Tool role messages with results (one per tool call)
* - Final assistant message with content (if present)
*/
private buildMessagesForMemory(content: string | undefined, toolCalls: any[] | undefined): Message[] {
const messages: Message[] = []

if (toolCalls?.length) {
// Generate stable IDs for each tool call (only if not provided by provider)
// Use index to ensure uniqueness even for same tool name in same millisecond
const toolCallsWithIds = toolCalls.map((tc: any, index: number) => ({
...tc,
_stableId: tc.id || `call_${tc.name}_${Date.now()}_${index}_${Math.random().toString(36).slice(2, 7)}`,
}))

// Add assistant message with tool_calls
const formattedToolCalls = toolCallsWithIds.map((tc: any) => ({
id: tc._stableId,
type: 'function' as const,
function: {
name: tc.name,
arguments: tc.rawArguments || JSON.stringify(tc.arguments || {}),
},
}))

messages.push({
role: 'assistant',
content: null,
tool_calls: formattedToolCalls,
})

// Add tool result messages using the same stable IDs
for (const tc of toolCallsWithIds) {
const resultContent = typeof tc.result === 'string' ? tc.result : JSON.stringify(tc.result || {})
messages.push({
role: 'tool',
content: resultContent,
tool_call_id: tc._stableId,
name: tc.name, // Store tool name for providers that need it (e.g., Google/Gemini)
})
}
}

// Add final assistant response if present
if (content && typeof content === 'string') {
messages.push({
role: 'assistant',
content,
})
}

return messages
}

private processProviderResponse(
response: any,
block: SerializedBlock,
Expand Down
Loading
Loading