diff --git a/mlserver/server.py b/mlserver/server.py index d82c49080..b12bf237f 100644 --- a/mlserver/server.py +++ b/mlserver/server.py @@ -1,6 +1,7 @@ import asyncio import signal import logging +import time from typing import Optional, List @@ -122,12 +123,17 @@ async def start(self, models_settings: List[ModelSettings] = []): servers_task = asyncio.gather(*servers) try: + total_start: float = time.perf_counter() await asyncio.gather( *[ - self._model_registry.load(model_settings) + self._timed_load(model_settings) for model_settings in models_settings ] ) + total_duration: float = time.perf_counter() - total_start + logger.debug( + f"Loaded {len(models_settings)} models in {total_duration:0.3f}s" + ) except Exception: # If one of the models failed to load during startup, shutdown the # server gracefully @@ -188,3 +194,15 @@ async def stop(self, sig: Optional[int] = None): if self._metrics_server: await self._metrics_server.stop(sig) + + async def _timed_load(self, model_settings: ModelSettings) -> MLModel: + """ + Load a model while measuring the elapsed time. + """ + t0: float = time.perf_counter() + try: + return await self._model_registry.load(model_settings) + finally: + elapsed: float = time.perf_counter() - t0 + model_name = getattr(model_settings, "name", "") + logger.debug(f"Model {model_name} loaded in {elapsed:0.3f}s")