Fixing issue with torchserve not being available on startup (#229)

htrivedi99 · web-flow · commit 5e01087a8137 · 2024-03-04T13:53:53.000-08:00
diff --git a/whisper/whisper-torchserve/data/config.properties b/whisper/whisper-torchserve/data/config.properties
@@ -1,5 +1,5 @@
 inference_address=http://0.0.0.0:8888
-batch_size=4
+batch_size=16
 ipex_enable=true
 async_logging=true
 
@@ -9,12 +9,13 @@ models={\
         "defaultVersion": true,\
         "marName": "whisper_base.mar",\
         "minWorkers": 1,\
-        "maxWorkers": 2,\
-        "batchSize": 4,\
-        "maxBatchDelay": 500,\
-        "responseTimeout": 24\
+        "maxWorkers": 4,\
+        "batchSize": 16,\
+        "maxBatchDelay": 250,\
+        "responseTimeout": 120\
     }\
   }\
 }
 
+# maxBatchDelay is the amount of time to wait for the batch size to fill up. Default is 250 ms.
 # default_workers_per_model=2
diff --git a/whisper/whisper-torchserve/model/model.py b/whisper/whisper-torchserve/model/model.py
@@ -9,12 +9,14 @@
 from huggingface_hub import snapshot_download
 
 TORCHSERVE_ENDPOINT = "http://0.0.0.0:8888/predictions/whisper_base"
+TORCHSERVE_HEALTH_ENDPOINT = "http://0.0.0.0:8888/ping"
 
 
 class Model:
     def __init__(self, **kwargs):
         self._data_dir = kwargs["data_dir"]
         self._model = None
+        self.torchserver_ready = False
 
     def start_tochserver(self):
         subprocess.run(
@@ -39,18 +41,29 @@ def load(self):
             local_dir=os.path.join(self._data_dir, "model_store"),
             max_workers=4,
         )
-        print("Downloaded weights succesfully!")
+        logging.info("⚡️ Weights Downloaded Successfully!")
 
         process = multiprocessing.Process(target=self.start_tochserver)
         process.start()
 
+        # Need to wait for the torchserve server to start up
+        while not self.torchserver_ready:
+            try:
+                res = requests.get(TORCHSERVE_HEALTH_ENDPOINT)
+                if res.status_code == 200:
+                    self.torchserver_ready = True
+                    logging.info("🔥Torchserve is ready!")
+            except Exception as e:
+                logging.info("⏳Torchserve is loading...")
+                time.sleep(5)
+
     async def predict(self, request: Dict):
         audio_base64 = request.get("audio")
         audio_bytes = base64.b64decode(audio_base64)
 
         async with httpx.AsyncClient() as client:
             res = await client.post(
-                TORCHSERVE_ENDPOINT, files={"data": (None, audio_bytes)}
+                TORCHSERVE_ENDPOINT, files={"data": (None, audio_bytes)}, timeout=120
             )
             transcription = res.text
         return {"output": transcription}