feat: add s2s blocks

Prajna1999 · Prajna1999 · commit c1807df44cd2 · 2026-03-06T12:40:34.000+05:30
diff --git a/backend/app/api/docs/llm/speech_to_speech.md b/backend/app/api/docs/llm/speech_to_speech.md
@@ -62,7 +62,7 @@ Each callback includes:
 - `gpt-4o-mini` - OpenAI GPT-4o Mini (faster, lower cost)
 
 ### TTS (Text-to-Speech)
-- `bulbul-v3` - Sarvam Bulbul V3 (**default**, natural Indian voices, MP3 output)
+- `bulbul:v3` - Sarvam Bulbul V3 (**default**, natural Indian voices, MP3 output)
 - `gemini-2.5-pro-preview-tts` - Google Gemini 2.5 Pro (OGG OPUS output)
 
 ## Edge Cases & Error Handling
@@ -99,7 +99,7 @@ curl -X POST https://api.kaapi.ai/llm/sts \
   -H "Content-Type: application/json" \
   -d @- <<EOF
 {
-  "audio": {
+  "query": {
     "type": "audio",
     "content": {
       "format": "base64",
diff --git a/backend/app/api/routes/llm_speech.py b/backend/app/api/routes/llm_speech.py
@@ -116,7 +116,7 @@ def speech_to_speech(
 
     # Create chain request
     chain_request = LLMChainRequest(
-        query=QueryParams(input=request.audio),
+        query=QueryParams(input=request.query),
         blocks=blocks,
         callback_url=request.callback_url,
         request_metadata=metadata,
diff --git a/backend/app/api/routes/llm_speech_examples.md b/backend/app/api/routes/llm_speech_examples.md
@@ -14,7 +14,7 @@ curl -X POST https://api.kaapi.ai/llm/sts \
   -H "Authorization: Bearer YOUR_API_KEY" \
   -H "Content-Type: application/json" \
   -d '{
-    "audio": {
+    "query": {
       "type": "audio",
       "content": {
         "format": "base64",
@@ -41,7 +41,7 @@ curl -X POST https://api.kaapi.ai/llm/sts \
 ### Request with All Options
 ```json
 {
-  "audio": {
+  "query": {
     "type": "audio",
     "content": {
       "format": "base64",
@@ -280,7 +280,7 @@ If specified knowledge base doesn't exist:
 ### English → English
 ```json
 {
-  "audio": {...},
+  "query": {...},
   "knowledge_base_ids": ["kb_123"],
   "input_language": "english",
   "output_language": "english",
@@ -291,7 +291,7 @@ If specified knowledge base doesn't exist:
 ### Hindi → English (Translation)
 ```json
 {
-  "audio": {...},
+  "query": {...},
   "knowledge_base_ids": ["kb_123"],
   "input_language": "hindi",
   "output_language": "english",
@@ -302,7 +302,7 @@ If specified knowledge base doesn't exist:
 ### Hinglish (Code-Switching)
 ```json
 {
-  "audio": {...},
+  "query": {...},
   "knowledge_base_ids": ["kb_123"],
   "input_language": "hinglish",
   "output_language": "hinglish",
@@ -314,7 +314,7 @@ If specified knowledge base doesn't exist:
 ### Regional Indian Languages
 ```json
 {
-  "audio": {...},
+  "query": {...},
   "knowledge_base_ids": ["kb_123"],
   "input_language": "auto",  // Auto-detect
   "output_language": "odia",  // Odia, Bengali, Punjabi, etc.
@@ -386,7 +386,7 @@ def handle_whatsapp_voice_message(audio_url, user_id):
         "https://api.kaapi.ai/llm/sts",
         headers={"Authorization": f"Bearer {API_KEY}"},
         json={
-            "audio": {
+            "query": {
                 "type": "audio",
                 "content": {
                     "format": "base64",
diff --git a/backend/app/models/llm/request.py b/backend/app/models/llm/request.py
@@ -792,7 +792,7 @@ class SpeechToSpeechRequest(SQLModel):
     Output: Audio + Text (via callback)
     """
 
-    audio: AudioInput = Field(
+    query: AudioInput = Field(
         ..., description="Voice note input (WhatsApp compatible format)"
     )
     knowledge_base_ids: list[str] = Field(
diff --git a/backend/app/services/llm/chain/utils.py b/backend/app/services/llm/chain/utils.py
@@ -94,7 +94,7 @@ def build_stt_block(model: STTModel, language_code: str) -> ChainBlock:
         params["language_code"] = (
             language_code if language_code != "unknown" else "unknown"
         )
-        params["mode"] = "transcription"
+        params["mode"] = "transcribe"
     elif provider == "google-native":
         # Google requires specific language code, fallback to en-IN if unknown
         params["language_code"] = (
@@ -146,7 +146,7 @@ def build_rag_block(model: LLMModel, knowledge_base_ids: list[str]) -> ChainBloc
     )
 
 
-def build_tts_block(model: TTSModel, language_code: str) -> ChainBlock:
+def build_tts_block(model: TTSModel, language_code: str = "en-IN") -> ChainBlock:
     """Build TTS (Text-to-Speech) block configuration.
 
     Args:
@@ -175,7 +175,7 @@ def build_tts_block(model: TTSModel, language_code: str) -> ChainBlock:
 
     # Add provider-specific parameters
     if provider == "sarvamai-native":
-        params["target_language_code"] = language_code
+        params["target_language_code"] = "en-IN"
         params["speaker"] = voice
         params["output_audio_codec"] = "mp3"  # WhatsApp compatible
     elif provider == "google-native":
diff --git a/backend/test_sts_debug.py b/backend/test_sts_debug.py
@@ -0,0 +1,193 @@
+"""Debug script for STS endpoint and chain job execution."""
+
+import logging
+import sys
+from sqlmodel import Session
+
+# Setup logging
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+)
+logger = logging.getLogger(__name__)
+
+
+def test_chain_job_creation():
+    """Test if chain job can be created and queued."""
+    from app.core.db import engine
+    from app.models.llm.request import (
+        LLMChainRequest,
+        QueryParams,
+        AudioInput,
+        AudioContent,
+        ChainBlock,
+        LLMCallConfig,
+        ConfigBlob,
+        NativeCompletionConfig,
+    )
+    from app.services.llm.jobs import start_chain_job
+
+    print("\n" + "=" * 80)
+    print("STEP 1: Creating test chain request")
+    print("=" * 80)
+
+    # Create a minimal valid chain request
+    test_request = LLMChainRequest(
+        query=QueryParams(
+            input=AudioInput(
+                type="audio",
+                content=AudioContent(
+                    format="base64",
+                    value="dGVzdF9hdWRpbw==",  # base64 encoded "test_audio"
+                    mime_type="audio/ogg",
+                ),
+            )
+        ),
+        blocks=[
+            ChainBlock(
+                config=LLMCallConfig(
+                    blob=ConfigBlob(
+                        completion=NativeCompletionConfig(
+                            provider="sarvamai-native",
+                            type="stt",
+                            params={
+                                "model": "saarika:v1",
+                                "language_code": "unknown",
+                                "mode": "transcription",
+                            },
+                        )
+                    )
+                ),
+                intermediate_callback=True,
+            )
+        ],
+    )
+
+    print(f"✅ Test request created with {len(test_request.blocks)} block(s)")
+
+    print("\n" + "=" * 80)
+    print("STEP 2: Attempting to start chain job")
+    print("=" * 80)
+
+    try:
+        with Session(engine) as session:
+            job_id = start_chain_job(
+                db=session,
+                request=test_request,
+                project_id=1,  # Use test project ID
+                organization_id=1,  # Use test org ID
+            )
+            print(f"✅ Chain job created successfully!")
+            print(f"   Job ID: {job_id}")
+            print(f"   Check your Celery worker logs for task execution")
+            return job_id
+    except Exception as e:
+        print(f"❌ Failed to create chain job: {e}")
+        import traceback
+
+        traceback.print_exc()
+        return None
+
+
+def check_celery_connection():
+    """Check if Celery is running and can receive tasks."""
+    print("\n" + "=" * 80)
+    print("STEP 3: Checking Celery connection")
+    print("=" * 80)
+
+    try:
+        from app.celery.celery_app import celery_app
+
+        # Check if broker is reachable
+        inspector = celery_app.control.inspect()
+        active_workers = inspector.active()
+
+        if active_workers:
+            print(f"✅ Celery workers are running:")
+            for worker_name, tasks in active_workers.items():
+                print(f"   - {worker_name}: {len(tasks)} active tasks")
+        else:
+            print("⚠️  No active Celery workers found!")
+            print("   Make sure to start the Celery worker with:")
+            print("   celery -A app.celery.celery_app worker --loglevel=info")
+
+        # Check registered tasks
+        registered = inspector.registered()
+        if registered:
+            print(f"\n✅ Registered tasks:")
+            for worker_name, tasks in registered.items():
+                print(f"   Worker: {worker_name}")
+                for task in sorted(tasks):
+                    if "high_priority" in task or "chain" in task.lower():
+                        print(f"      - {task}")
+
+    except Exception as e:
+        print(f"❌ Failed to check Celery: {e}")
+        import traceback
+
+        traceback.print_exc()
+
+
+def check_function_import():
+    """Verify execute_chain_job can be imported."""
+    print("\n" + "=" * 80)
+    print("STEP 4: Verifying execute_chain_job import")
+    print("=" * 80)
+
+    try:
+        from app.services.llm.jobs import execute_chain_job
+
+        print(f"✅ execute_chain_job is importable")
+        print(f"   Parameters: {execute_chain_job.__code__.co_varnames[:6]}")
+
+        # Try dynamic import (same way Celery does it)
+        import importlib
+
+        module = importlib.import_module("app.services.llm.jobs")
+        func = getattr(module, "execute_chain_job")
+        print(f"✅ Dynamic import successful (same as Celery)")
+
+    except Exception as e:
+        print(f"❌ Import failed: {e}")
+        import traceback
+
+        traceback.print_exc()
+
+
+if __name__ == "__main__":
+    print("\n" + "=" * 80)
+    print("STS ENDPOINT DEBUG SCRIPT")
+    print("=" * 80)
+
+    check_function_import()
+    check_celery_connection()
+    job_id = test_chain_job_creation()
+
+    if job_id:
+        print("\n" + "=" * 80)
+        print("DEBUGGING SUMMARY")
+        print("=" * 80)
+        print(f"✅ Chain job was queued successfully: {job_id}")
+        print(f"\nNext steps:")
+        print(f"1. Check your Celery worker logs for:")
+        print(
+            f"   - Task app.celery.tasks.job_execution.execute_high_priority_task received"
+        )
+        print(f"   - Executing high_priority job {job_id}")
+        print(f"   - Function path: app.services.llm.jobs.execute_chain_job")
+        print(f"\n2. If you don't see the task in worker logs:")
+        print(f"   - Verify Celery broker (RabbitMQ/Redis) is running")
+        print(f"   - Check broker connection in Celery worker startup logs")
+        print(f"   - Restart Celery worker")
+        print(f"\n3. If task starts but fails:")
+        print(f"   - Look for error in Celery worker logs")
+        print(
+            f"   - Check database for job status: SELECT * FROM job WHERE id = '{job_id}';"
+        )
+    else:
+        print("\n" + "=" * 80)
+        print("DEBUGGING SUMMARY")
+        print("=" * 80)
+        print(f"❌ Failed to queue chain job")
+        print(f"   Check the error messages above for details")
+
+    print("=" * 80 + "\n")

Original file line number	Diff line number	Diff line change
`@@ -792,7 +792,7 @@ class SpeechToSpeechRequest(SQLModel):`
`792`	`792`	`Output: Audio + Text (via callback)`
`793`	`793`	`"""`
`794`	`794`
`795`		`- audio: AudioInput = Field(`
	`795`	`+ query: AudioInput = Field(`
`796`	`796`	`..., description="Voice note input (WhatsApp compatible format)"`
`797`	`797`	`)`
`798`	`798`	`knowledge_base_ids: list[str] = Field(`