basetenlabs
diff --git a/‎xtts-streaming/model/model.py‎
Lines changed: 22 additions & 55 deletions b/‎xtts-streaming/model/model.py‎
Lines changed: 22 additions & 55 deletions
diff --git a/‎xtts-streaming/test.py‎
Lines changed: 0 additions & 168 deletions b/‎xtts-streaming/test.py‎
Lines changed: 0 additions & 168 deletions
@@ -4,15 +4,13 @@
 import os
 import time
 import wave
-import json
 
 import numpy as np
 import torch
 from TTS.tts.configs.xtts_config import XttsConfig
 from TTS.tts.models.xtts import Xtts
 from TTS.utils.generic_utils import get_user_data_dir
 from TTS.utils.manage import ModelManager
-import fastapi
 
 # This is one of the speaker voices that comes with xtts
 SPEAKER_NAME = "Claribel Dervla"
@@ -35,10 +33,12 @@ def load(self):
         config = XttsConfig()
         config.load_json(os.path.join(model_path, "config.json"))
         self.model = Xtts.init_from_config(config)
+        # self.model.load_checkpoint(config, checkpoint_dir=model_path, eval=True)
         self.model.load_checkpoint(
             config, checkpoint_dir=model_path, eval=True, use_deepspeed=True
         )
         self.model.to(device)
+        # self.compiled_model = torch.compile(self.model.inference_stream)
 
         self.speaker = {
             "speaker_embedding": self.model.speaker_manager.speakers[SPEAKER_NAME][
@@ -78,58 +78,25 @@ def wav_postprocess(self, wav):
         wav = (wav * 32767).astype(np.int16)
         return wav
 
+    def predict(self, model_input):
+        text = model_input.get("text")
+        language = model_input.get("language", "en")
+        chunk_size = int(
+            model_input.get("chunk_size", 20)
+        )  # Ensure chunk_size is an integer
+        add_wav_header = False
 
-    async def websocket(self, websocket: fastapi.WebSocket):
-        """Handle WebSocket connections for text-to-speech requests"""
-        print("WebSocket connected")
-        try:
-            while True:
-                data = await websocket.receive_text()
-                
-                try:
-                    # Parse JSON input if provided
-                    input_data = json.loads(data)
-                except json.JSONDecodeError:
-                    # If not JSON, assume it's just text
-                    input_data = {"text": data, "language": "en", "chunk_size": 20}
-                
-                text = input_data.get("text")
-                language = input_data.get("language", "en")
-                chunk_size = int(input_data.get("chunk_size", 20))
-                
-                # Process the text to speech using the logic from the original predict method
-                streamer = self.model.inference_stream(
-                    text,
-                    language,
-                    self.gpt_cond_latent,
-                    self.speaker_embedding,
-                    stream_chunk_size=chunk_size,
-                    enable_text_splitting=True,
-                    temperature=0.2,
-                )
+        streamer = self.model.inference_stream(
+            text,
+            language,
+            self.gpt_cond_latent,
+            self.speaker_embedding,
+            stream_chunk_size=chunk_size,
+            enable_text_splitting=True,
+            temperature=0.2,
+        )
 
-                for chunk in streamer:
-                    processed_chunk = self.wav_postprocess(chunk)
-                    processed_bytes = processed_chunk.tobytes()
-                    encoded_chunk = base64.b64encode(processed_bytes).decode('utf-8')
-                    await websocket.send_json({
-                        "type": "chunk",
-                        "data": encoded_chunk
-                    })
-                
-                await websocket.send_json({
-                    "type": "complete",
-                    "message": f"Processed '{text}'"
-                })
-                
-        except fastapi.WebSocketDisconnect:
-            print("WebSocket disconnected")
-        except Exception as e:
-            print(f"WebSocket error: {str(e)}")
-            try:
-                await websocket.send_json({
-                    "type": "error",
-                    "message": str(e)
-                })
-            except:
-                pass
+        for chunk in streamer:
+            processed_chunk = self.wav_postprocess(chunk)
+            processed_bytes = processed_chunk.tobytes()
+            yield processed_bytes