Skip to content

Commit 5e01087

Browse files
authored
Fixing issue with torchserve not being available on startup (#229)
1 parent 0487fe9 commit 5e01087

File tree

2 files changed

+21
-7
lines changed

2 files changed

+21
-7
lines changed
Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
inference_address=http://0.0.0.0:8888
2-
batch_size=4
2+
batch_size=16
33
ipex_enable=true
44
async_logging=true
55

@@ -9,12 +9,13 @@ models={\
99
"defaultVersion": true,\
1010
"marName": "whisper_base.mar",\
1111
"minWorkers": 1,\
12-
"maxWorkers": 2,\
13-
"batchSize": 4,\
14-
"maxBatchDelay": 500,\
15-
"responseTimeout": 24\
12+
"maxWorkers": 4,\
13+
"batchSize": 16,\
14+
"maxBatchDelay": 250,\
15+
"responseTimeout": 120\
1616
}\
1717
}\
1818
}
1919

20+
# maxBatchDelay is the amount of time to wait for the batch size to fill up. Default is 250 ms.
2021
# default_workers_per_model=2

whisper/whisper-torchserve/model/model.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,14 @@
99
from huggingface_hub import snapshot_download
1010

1111
TORCHSERVE_ENDPOINT = "http://0.0.0.0:8888/predictions/whisper_base"
12+
TORCHSERVE_HEALTH_ENDPOINT = "http://0.0.0.0:8888/ping"
1213

1314

1415
class Model:
1516
def __init__(self, **kwargs):
1617
self._data_dir = kwargs["data_dir"]
1718
self._model = None
19+
self.torchserver_ready = False
1820

1921
def start_tochserver(self):
2022
subprocess.run(
@@ -39,18 +41,29 @@ def load(self):
3941
local_dir=os.path.join(self._data_dir, "model_store"),
4042
max_workers=4,
4143
)
42-
print("Downloaded weights succesfully!")
44+
logging.info("⚡️ Weights Downloaded Successfully!")
4345

4446
process = multiprocessing.Process(target=self.start_tochserver)
4547
process.start()
4648

49+
# Need to wait for the torchserve server to start up
50+
while not self.torchserver_ready:
51+
try:
52+
res = requests.get(TORCHSERVE_HEALTH_ENDPOINT)
53+
if res.status_code == 200:
54+
self.torchserver_ready = True
55+
logging.info("🔥Torchserve is ready!")
56+
except Exception as e:
57+
logging.info("⏳Torchserve is loading...")
58+
time.sleep(5)
59+
4760
async def predict(self, request: Dict):
4861
audio_base64 = request.get("audio")
4962
audio_bytes = base64.b64decode(audio_base64)
5063

5164
async with httpx.AsyncClient() as client:
5265
res = await client.post(
53-
TORCHSERVE_ENDPOINT, files={"data": (None, audio_bytes)}
66+
TORCHSERVE_ENDPOINT, files={"data": (None, audio_bytes)}, timeout=120
5467
)
5568
transcription = res.text
5669
return {"output": transcription}

0 commit comments

Comments
 (0)