Skip to content

Commit c1807df

Browse files
committed
feat: add s2s blocks
1 parent 5b9a4e9 commit c1807df

File tree

6 files changed

+207
-14
lines changed

6 files changed

+207
-14
lines changed

backend/app/api/docs/llm/speech_to_speech.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ Each callback includes:
6262
- `gpt-4o-mini` - OpenAI GPT-4o Mini (faster, lower cost)
6363

6464
### TTS (Text-to-Speech)
65-
- `bulbul-v3` - Sarvam Bulbul V3 (**default**, natural Indian voices, MP3 output)
65+
- `bulbul:v3` - Sarvam Bulbul V3 (**default**, natural Indian voices, MP3 output)
6666
- `gemini-2.5-pro-preview-tts` - Google Gemini 2.5 Pro (OGG OPUS output)
6767

6868
## Edge Cases & Error Handling
@@ -99,7 +99,7 @@ curl -X POST https://api.kaapi.ai/llm/sts \
9999
-H "Content-Type: application/json" \
100100
-d @- <<EOF
101101
{
102-
"audio": {
102+
"query": {
103103
"type": "audio",
104104
"content": {
105105
"format": "base64",

backend/app/api/routes/llm_speech.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ def speech_to_speech(
116116

117117
# Create chain request
118118
chain_request = LLMChainRequest(
119-
query=QueryParams(input=request.audio),
119+
query=QueryParams(input=request.query),
120120
blocks=blocks,
121121
callback_url=request.callback_url,
122122
request_metadata=metadata,

backend/app/api/routes/llm_speech_examples.md

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ curl -X POST https://api.kaapi.ai/llm/sts \
1414
-H "Authorization: Bearer YOUR_API_KEY" \
1515
-H "Content-Type: application/json" \
1616
-d '{
17-
"audio": {
17+
"query": {
1818
"type": "audio",
1919
"content": {
2020
"format": "base64",
@@ -41,7 +41,7 @@ curl -X POST https://api.kaapi.ai/llm/sts \
4141
### Request with All Options
4242
```json
4343
{
44-
"audio": {
44+
"query": {
4545
"type": "audio",
4646
"content": {
4747
"format": "base64",
@@ -280,7 +280,7 @@ If specified knowledge base doesn't exist:
280280
### English → English
281281
```json
282282
{
283-
"audio": {...},
283+
"query": {...},
284284
"knowledge_base_ids": ["kb_123"],
285285
"input_language": "english",
286286
"output_language": "english",
@@ -291,7 +291,7 @@ If specified knowledge base doesn't exist:
291291
### Hindi → English (Translation)
292292
```json
293293
{
294-
"audio": {...},
294+
"query": {...},
295295
"knowledge_base_ids": ["kb_123"],
296296
"input_language": "hindi",
297297
"output_language": "english",
@@ -302,7 +302,7 @@ If specified knowledge base doesn't exist:
302302
### Hinglish (Code-Switching)
303303
```json
304304
{
305-
"audio": {...},
305+
"query": {...},
306306
"knowledge_base_ids": ["kb_123"],
307307
"input_language": "hinglish",
308308
"output_language": "hinglish",
@@ -314,7 +314,7 @@ If specified knowledge base doesn't exist:
314314
### Regional Indian Languages
315315
```json
316316
{
317-
"audio": {...},
317+
"query": {...},
318318
"knowledge_base_ids": ["kb_123"],
319319
"input_language": "auto", // Auto-detect
320320
"output_language": "odia", // Odia, Bengali, Punjabi, etc.
@@ -386,7 +386,7 @@ def handle_whatsapp_voice_message(audio_url, user_id):
386386
"https://api.kaapi.ai/llm/sts",
387387
headers={"Authorization": f"Bearer {API_KEY}"},
388388
json={
389-
"audio": {
389+
"query": {
390390
"type": "audio",
391391
"content": {
392392
"format": "base64",

backend/app/models/llm/request.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -792,7 +792,7 @@ class SpeechToSpeechRequest(SQLModel):
792792
Output: Audio + Text (via callback)
793793
"""
794794

795-
audio: AudioInput = Field(
795+
query: AudioInput = Field(
796796
..., description="Voice note input (WhatsApp compatible format)"
797797
)
798798
knowledge_base_ids: list[str] = Field(

backend/app/services/llm/chain/utils.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ def build_stt_block(model: STTModel, language_code: str) -> ChainBlock:
9494
params["language_code"] = (
9595
language_code if language_code != "unknown" else "unknown"
9696
)
97-
params["mode"] = "transcription"
97+
params["mode"] = "transcribe"
9898
elif provider == "google-native":
9999
# Google requires specific language code, fallback to en-IN if unknown
100100
params["language_code"] = (
@@ -146,7 +146,7 @@ def build_rag_block(model: LLMModel, knowledge_base_ids: list[str]) -> ChainBloc
146146
)
147147

148148

149-
def build_tts_block(model: TTSModel, language_code: str) -> ChainBlock:
149+
def build_tts_block(model: TTSModel, language_code: str = "en-IN") -> ChainBlock:
150150
"""Build TTS (Text-to-Speech) block configuration.
151151
152152
Args:
@@ -175,7 +175,7 @@ def build_tts_block(model: TTSModel, language_code: str) -> ChainBlock:
175175

176176
# Add provider-specific parameters
177177
if provider == "sarvamai-native":
178-
params["target_language_code"] = language_code
178+
params["target_language_code"] = "en-IN"
179179
params["speaker"] = voice
180180
params["output_audio_codec"] = "mp3" # WhatsApp compatible
181181
elif provider == "google-native":

backend/test_sts_debug.py

Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
"""Debug script for STS endpoint and chain job execution."""
2+
3+
import logging
4+
import sys
5+
from sqlmodel import Session
6+
7+
# Setup logging
8+
logging.basicConfig(
9+
level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
10+
)
11+
logger = logging.getLogger(__name__)
12+
13+
14+
def test_chain_job_creation():
15+
"""Test if chain job can be created and queued."""
16+
from app.core.db import engine
17+
from app.models.llm.request import (
18+
LLMChainRequest,
19+
QueryParams,
20+
AudioInput,
21+
AudioContent,
22+
ChainBlock,
23+
LLMCallConfig,
24+
ConfigBlob,
25+
NativeCompletionConfig,
26+
)
27+
from app.services.llm.jobs import start_chain_job
28+
29+
print("\n" + "=" * 80)
30+
print("STEP 1: Creating test chain request")
31+
print("=" * 80)
32+
33+
# Create a minimal valid chain request
34+
test_request = LLMChainRequest(
35+
query=QueryParams(
36+
input=AudioInput(
37+
type="audio",
38+
content=AudioContent(
39+
format="base64",
40+
value="dGVzdF9hdWRpbw==", # base64 encoded "test_audio"
41+
mime_type="audio/ogg",
42+
),
43+
)
44+
),
45+
blocks=[
46+
ChainBlock(
47+
config=LLMCallConfig(
48+
blob=ConfigBlob(
49+
completion=NativeCompletionConfig(
50+
provider="sarvamai-native",
51+
type="stt",
52+
params={
53+
"model": "saarika:v1",
54+
"language_code": "unknown",
55+
"mode": "transcription",
56+
},
57+
)
58+
)
59+
),
60+
intermediate_callback=True,
61+
)
62+
],
63+
)
64+
65+
print(f"✅ Test request created with {len(test_request.blocks)} block(s)")
66+
67+
print("\n" + "=" * 80)
68+
print("STEP 2: Attempting to start chain job")
69+
print("=" * 80)
70+
71+
try:
72+
with Session(engine) as session:
73+
job_id = start_chain_job(
74+
db=session,
75+
request=test_request,
76+
project_id=1, # Use test project ID
77+
organization_id=1, # Use test org ID
78+
)
79+
print(f"✅ Chain job created successfully!")
80+
print(f" Job ID: {job_id}")
81+
print(f" Check your Celery worker logs for task execution")
82+
return job_id
83+
except Exception as e:
84+
print(f"❌ Failed to create chain job: {e}")
85+
import traceback
86+
87+
traceback.print_exc()
88+
return None
89+
90+
91+
def check_celery_connection():
92+
"""Check if Celery is running and can receive tasks."""
93+
print("\n" + "=" * 80)
94+
print("STEP 3: Checking Celery connection")
95+
print("=" * 80)
96+
97+
try:
98+
from app.celery.celery_app import celery_app
99+
100+
# Check if broker is reachable
101+
inspector = celery_app.control.inspect()
102+
active_workers = inspector.active()
103+
104+
if active_workers:
105+
print(f"✅ Celery workers are running:")
106+
for worker_name, tasks in active_workers.items():
107+
print(f" - {worker_name}: {len(tasks)} active tasks")
108+
else:
109+
print("⚠️ No active Celery workers found!")
110+
print(" Make sure to start the Celery worker with:")
111+
print(" celery -A app.celery.celery_app worker --loglevel=info")
112+
113+
# Check registered tasks
114+
registered = inspector.registered()
115+
if registered:
116+
print(f"\n✅ Registered tasks:")
117+
for worker_name, tasks in registered.items():
118+
print(f" Worker: {worker_name}")
119+
for task in sorted(tasks):
120+
if "high_priority" in task or "chain" in task.lower():
121+
print(f" - {task}")
122+
123+
except Exception as e:
124+
print(f"❌ Failed to check Celery: {e}")
125+
import traceback
126+
127+
traceback.print_exc()
128+
129+
130+
def check_function_import():
131+
"""Verify execute_chain_job can be imported."""
132+
print("\n" + "=" * 80)
133+
print("STEP 4: Verifying execute_chain_job import")
134+
print("=" * 80)
135+
136+
try:
137+
from app.services.llm.jobs import execute_chain_job
138+
139+
print(f"✅ execute_chain_job is importable")
140+
print(f" Parameters: {execute_chain_job.__code__.co_varnames[:6]}")
141+
142+
# Try dynamic import (same way Celery does it)
143+
import importlib
144+
145+
module = importlib.import_module("app.services.llm.jobs")
146+
func = getattr(module, "execute_chain_job")
147+
print(f"✅ Dynamic import successful (same as Celery)")
148+
149+
except Exception as e:
150+
print(f"❌ Import failed: {e}")
151+
import traceback
152+
153+
traceback.print_exc()
154+
155+
156+
if __name__ == "__main__":
157+
print("\n" + "=" * 80)
158+
print("STS ENDPOINT DEBUG SCRIPT")
159+
print("=" * 80)
160+
161+
check_function_import()
162+
check_celery_connection()
163+
job_id = test_chain_job_creation()
164+
165+
if job_id:
166+
print("\n" + "=" * 80)
167+
print("DEBUGGING SUMMARY")
168+
print("=" * 80)
169+
print(f"✅ Chain job was queued successfully: {job_id}")
170+
print(f"\nNext steps:")
171+
print(f"1. Check your Celery worker logs for:")
172+
print(
173+
f" - Task app.celery.tasks.job_execution.execute_high_priority_task received"
174+
)
175+
print(f" - Executing high_priority job {job_id}")
176+
print(f" - Function path: app.services.llm.jobs.execute_chain_job")
177+
print(f"\n2. If you don't see the task in worker logs:")
178+
print(f" - Verify Celery broker (RabbitMQ/Redis) is running")
179+
print(f" - Check broker connection in Celery worker startup logs")
180+
print(f" - Restart Celery worker")
181+
print(f"\n3. If task starts but fails:")
182+
print(f" - Look for error in Celery worker logs")
183+
print(
184+
f" - Check database for job status: SELECT * FROM job WHERE id = '{job_id}';"
185+
)
186+
else:
187+
print("\n" + "=" * 80)
188+
print("DEBUGGING SUMMARY")
189+
print("=" * 80)
190+
print(f"❌ Failed to queue chain job")
191+
print(f" Check the error messages above for details")
192+
193+
print("=" * 80 + "\n")

0 commit comments

Comments
 (0)