Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 21 additions & 2 deletions .github/workflows/auto-unit-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,26 @@ jobs:
echo "✅ All tests passed successfully."
fi

- name: Upload coverage to Codecov
# Detect architecture
- name: Detect architecture
id: arch
run: echo "arch=$(uname -m)" >> $GITHUB_OUTPUT

# Use Python uploader on ARM
- name: Upload coverage to Codecov (Python uploader on ARM)
if: startsWith(steps.arch.outputs.arch, 'arm') || startsWith(steps.arch.outputs.arch, 'aarch64')
run: |
pip install --upgrade codecov
codecov \
-t ${{ secrets.CODECOV_TOKEN }} \
-f test/coverage.xml \
-F unittests \
-n codecov-umbrella \
-v

# Use official action on x86
- name: Upload coverage to Codecov (Official Action on x86)
if: steps.arch.outputs.arch == 'x86_64'
uses: codecov/codecov-action@v4
with:
files: test/coverage.xml
Expand All @@ -77,4 +96,4 @@ jobs:
name: codecov-umbrella
fail_ci_if_error: false
verbose: true
directory: .
directory: .
283 changes: 113 additions & 170 deletions backend/apps/voice_app.py
Original file line number Diff line number Diff line change
@@ -1,179 +1,122 @@
import asyncio
import logging
from http import HTTPStatus

from fastapi import APIRouter, WebSocket
from nexent.core.models.stt_model import STTConfig, STTModel
from nexent.core.models.tts_model import TTSConfig, TTSModel
from fastapi import APIRouter, WebSocket, HTTPException, Body, Query
from fastapi.responses import JSONResponse

from consts.const import APPID, CLUSTER, SPEED_RATIO, TEST_VOICE_PATH, TOKEN, VOICE_TYPE
from consts.exceptions import (
VoiceServiceException,
STTConnectionException,
TTSConnectionException,
VoiceConfigException
)
from consts.model import VoiceConnectivityRequest, VoiceConnectivityResponse
from services.voice_service import get_voice_service

logger = logging.getLogger("voice_app")


class VoiceService:
"""Unified voice service that hosts both STT and TTS on a single FastAPI application"""

def __init__(self):
"""
Initialize the voice service with configurations from const.py.
"""
# Initialize STT configuration
self.stt_config = STTConfig(
appid=APPID,
token=TOKEN
router = APIRouter(prefix="/voice")


@router.websocket("/stt/ws")
async def stt_websocket(websocket: WebSocket):
"""WebSocket endpoint for real-time audio streaming and STT"""
logger.info("STT WebSocket connection attempt...")
await websocket.accept()
logger.info("STT WebSocket connection accepted")

try:
voice_service = get_voice_service()
await voice_service.start_stt_streaming_session(websocket)
except STTConnectionException as e:
logger.error(f"STT WebSocket error: {str(e)}")
await websocket.send_json({"error": str(e)})
except Exception as e:
logger.error(f"STT WebSocket error: {str(e)}")
await websocket.send_json({"error": str(e)})
finally:
logger.info("STT WebSocket connection closed")


@router.websocket("/tts/ws")
async def tts_websocket(websocket: WebSocket):
"""WebSocket endpoint for streaming TTS"""
logger.info("TTS WebSocket connection attempt...")
await websocket.accept()
logger.info("TTS WebSocket connection accepted")

try:
# Receive text from client (single request)
data = await websocket.receive_json()
text = data.get("text")

if not text:
if websocket.client_state.name == "CONNECTED":
await websocket.send_json({"error": "No text provided"})
return

# Stream TTS audio to WebSocket
voice_service = get_voice_service()
await voice_service.stream_tts_to_websocket(websocket, text)

except TTSConnectionException as e:
logger.error(f"TTS WebSocket error: {str(e)}")
await websocket.send_json({"error": str(e)})
except Exception as e:
logger.error(f"TTS WebSocket error: {str(e)}")
await websocket.send_json({"error": str(e)})
finally:
logger.info("TTS WebSocket connection closed")
# Ensure connection is properly closed
if websocket.client_state.name == "CONNECTED":
await websocket.close()


@router.post("/connectivity")
async def check_voice_connectivity(request: VoiceConnectivityRequest):
"""
Check voice service connectivity

Args:
request: VoiceConnectivityRequest containing model_type

Returns:
VoiceConnectivityResponse with connectivity status
"""
try:
voice_service = get_voice_service()
connected = await voice_service.check_voice_connectivity(request.model_type)

return JSONResponse(
status_code=HTTPStatus.OK,
content=VoiceConnectivityResponse(
connected=connected,
model_type=request.model_type,
message="Service is connected" if connected else "Service connection failed"
).dict()
)

# Initialize TTS configuration
self.tts_config = TTSConfig(
appid=APPID,
token=TOKEN,
cluster=CLUSTER,
voice_type=VOICE_TYPE,
speed_ratio=SPEED_RATIO
except VoiceServiceException as e:
logger.error(f"Voice service error: {str(e)}")
raise HTTPException(
status_code=HTTPStatus.BAD_REQUEST,
detail=str(e)
)
except (STTConnectionException, TTSConnectionException) as e:
logger.error(f"Voice connectivity error: {str(e)}")
raise HTTPException(
status_code=HTTPStatus.SERVICE_UNAVAILABLE,
detail=str(e)
)
except VoiceConfigException as e:
logger.error(f"Voice configuration error: {str(e)}")
raise HTTPException(
status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
detail=str(e)
)
except Exception as e:
logger.error(f"Unexpected voice service error: {str(e)}")
raise HTTPException(
status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
detail="Voice service error"
)

# Initialize models
self.stt_model = STTModel(self.stt_config, TEST_VOICE_PATH)
self.tts_model = TTSModel(self.tts_config)

# Create FastAPI application
self.router = APIRouter(prefix="/voice")

# Set up routes
self._setup_routes()

def _setup_routes(self):
"""Configure API routes for voice services"""

# STT WebSocket route
@self.router.websocket("/stt/ws")
async def stt_websocket(websocket: WebSocket):
"""WebSocket endpoint for real-time audio streaming and STT"""
logger.info("STT WebSocket connection attempt...")
await websocket.accept()
logger.info("STT WebSocket connection accepted")
try:
# Start streaming session
await self.stt_model.start_streaming_session(websocket)
except Exception as e:
logger.error(f"STT WebSocket error: {str(e)}")
import traceback
traceback.print_exc()
await websocket.send_json({"error": str(e)})
finally:
logger.info("STT WebSocket connection closed")

# TTS WebSocket route
@self.router.websocket("/tts/ws")
async def tts_websocket(websocket: WebSocket):
"""WebSocket endpoint for streaming TTS"""
logger.info("TTS WebSocket connection attempt...")
await websocket.accept()
logger.info("TTS WebSocket connection accepted")

try:
# Receive text from client (single request)
data = await websocket.receive_json()
text = data.get("text")

if not text:
if websocket.client_state.name == "CONNECTED":
await websocket.send_json({"error": "No text provided"})
return

# Generate and stream audio chunks
try:
# First try to use it as a coroutine that returns an async iterator
speech_result = await self.tts_model.generate_speech(text, stream=True)

# Check if it's an async iterator or a regular iterable
if hasattr(speech_result, '__aiter__'):
# It's an async iterator, use async for
async for chunk in speech_result:
if websocket.client_state.name == "CONNECTED":
await websocket.send_bytes(chunk)
else:
break
elif hasattr(speech_result, '__iter__'):
# It's a regular iterator, use normal for
for chunk in speech_result:
if websocket.client_state.name == "CONNECTED":
await websocket.send_bytes(chunk)
else:
break
else:
# It's a single chunk, send it directly
if websocket.client_state.name == "CONNECTED":
await websocket.send_bytes(speech_result)

await asyncio.sleep(0.1)

except TypeError as te:
# If speech_result is still a coroutine, try calling it directly without stream=True
if "async for" in str(te) and "requires an object with __aiter__" in str(te):
logger.error("Falling back to non-streaming TTS")
speech_data = await self.tts_model.generate_speech(text, stream=False)
if websocket.client_state.name == "CONNECTED":
await websocket.send_bytes(speech_data)
else:
raise

# Send end marker after successful TTS generation
if websocket.client_state.name == "CONNECTED":
await websocket.send_json({"status": "completed"})

except Exception as e:
logger.error(f"TTS WebSocket error: {str(e)}")
import traceback
traceback.print_exc()
await websocket.send_json({"error": str(e)})
finally:
logger.info("TTS WebSocket connection closed")
# Ensure connection is properly closed
if websocket.client_state.name == "CONNECTED":
await websocket.close()

async def check_connectivity(self, model_type: str) -> bool:
"""
Check the connectivity status of voice services (STT and TTS)

Args:
model_type: The type of model to check, options are 'stt', 'tts'

Returns:
bool: Returns True if all services are connected normally, False if any service connection fails
"""
try:
stt_connected = False
tts_connected = False

if model_type == 'stt':
logging.info(f'STT Config: {self.stt_config}')
stt_connected = await self.stt_model.check_connectivity()
if not stt_connected:
logging.error(
"Speech Recognition (STT) service connection failed")

if model_type == 'tts':
logging.info(f'TTS Config: {self.tts_config}')
tts_connected = await self.tts_model.check_connectivity()
if not tts_connected:
logging.error(
"Text-to-Speech (TTS) service connection failed")

# Return the corresponding connection status based on model_type
if model_type == 'stt':
return stt_connected
elif model_type == 'tts':
return tts_connected
else:
logging.error(f"Unknown model type: {model_type}")
return False

except Exception as e:
logging.error(
f"Voice service connectivity test encountered an exception: {str(e)}")
return False


router = VoiceService().router
20 changes: 20 additions & 0 deletions backend/consts/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,4 +65,24 @@ class NotFoundException(Exception):

class MEConnectionException(Exception):
"""Raised when not found exception occurs."""
pass


class VoiceServiceException(Exception):
"""Raised when voice service fails."""
pass


class STTConnectionException(Exception):
"""Raised when STT service connection fails."""
pass


class TTSConnectionException(Exception):
"""Raised when TTS service connection fails."""
pass


class VoiceConfigException(Exception):
"""Raised when voice configuration is invalid."""
pass
26 changes: 26 additions & 0 deletions backend/consts/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,3 +314,29 @@ class MemoryAgentShareMode(str, Enum):
@classmethod
def default(cls) -> "MemoryAgentShareMode":
return cls.NEVER


# Voice Service Data Models
# ---------------------------------------------------------------------------
class VoiceConnectivityRequest(BaseModel):
"""Request model for voice service connectivity check"""
model_type: str = Field(..., description="Type of model to check ('stt' or 'tts')")


class VoiceConnectivityResponse(BaseModel):
"""Response model for voice service connectivity check"""
connected: bool = Field(..., description="Whether the service is connected")
model_type: str = Field(..., description="Type of model checked")
message: str = Field(..., description="Status message")


class TTSRequest(BaseModel):
"""Request model for TTS text-to-speech conversion"""
text: str = Field(..., min_length=1, description="Text to convert to speech")
stream: bool = Field(True, description="Whether to stream the audio")


class TTSResponse(BaseModel):
"""Response model for TTS conversion"""
status: str = Field(..., description="Status of the TTS conversion")
message: Optional[str] = Field(None, description="Additional message")
Loading