Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions utils/unified_ai_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from typing import Dict, List, Any, Optional, Union, Tuple
from enum import Enum
from functools import lru_cache
from concurrent.futures import ThreadPoolExecutor

# Set up logging
logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -180,6 +181,30 @@ def speech_to_text(self, audio_data: bytes) -> str:
logger.error(f"STT error: {e}")
return "Speech recognition error"

def batch_speech_to_text(self, audio_data_list: List[bytes]) -> List[str]:
"""
Batch process speech-to-text using parallel execution

Args:
audio_data_list: List of audio data bytes

Returns:
List of transcription strings in same order as input
"""
if not audio_data_list:
return []

if 'huggingface' not in self.available_providers:
return ["Speech recognition not available"] * len(audio_data_list)

try:
with ThreadPoolExecutor(max_workers=min(10, len(audio_data_list))) as executor:
results = list(executor.map(self.speech_to_text, audio_data_list))
return results
except Exception as e:
logger.error(f"Batch STT error: {e}")
return ["Batch processing error"] * len(audio_data_list)

# === AI HELPER FUNCTIONS (from ai_helper.py) ===

def get_ai_response(self, prompt: str, conversation_history: Optional[List[Dict[str, str]]] = None) -> str:
Expand Down
23 changes: 18 additions & 5 deletions utils/voice_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import struct
import math
import time
from utils.unified_ai_service import get_unified_ai_service

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -479,19 +480,31 @@ def process_batch(self):

logger.info(f"Processing batch of {len(batch)} audio items")

# TODO: Implement batched processing through AI service manager
# This would be connected to the AI service for bulk processing
# Collect audio data for batch processing
audio_data_list = [item.get("audio_data") for item in batch]

# For now, process each item individually
for item in batch:
# Use AI service manager for bulk processing
try:
ai_service = get_unified_ai_service()
transcriptions = ai_service.batch_speech_to_text(audio_data_list)
except Exception as e:
logger.error(f"Failed to process batch with AI service: {e}")
transcriptions = ["Processing failed"] * len(batch)

# Process each item with its result
for i, item in enumerate(batch):
try:
callback = item.get("callback")
if callable(callback):
# Process the item and call the callback with results
audio_data = item.get("audio_data")
metadata = item.get("metadata", {})

# The callback function would need to handle the transcription
# Add transcription to metadata
result_text = transcriptions[i] if i < len(transcriptions) else "Processing error"
metadata["transcription"] = result_text

# The callback function handles the result
callback(audio_data, metadata)
except Exception as e:
logger.error(f"Error processing batch item: {str(e)}")
Expand Down
Loading