Add warm-up to onnx as some GPUs require kernel compilation before accepting inferences (#22685)

NickM-27 · web-flow · commit 29ca18c24cc0 · 2026-03-29T11:19:46.000-05:00
diff --git a/frigate/detectors/plugins/onnx.py b/frigate/detectors/plugins/onnx.py
@@ -8,6 +8,8 @@
 from frigate.detectors.detection_runners import get_optimized_runner
 from frigate.detectors.detector_config import (
     BaseDetectorConfig,
+    InputDTypeEnum,
+    InputTensorEnum,
     ModelTypeEnum,
 )
 from frigate.util.model import (
@@ -59,8 +61,34 @@ def __init__(self, detector_config: ONNXDetectorConfig):
         if self.onnx_model_type == ModelTypeEnum.yolox:
             self.calculate_grids_strides()
 
+        self._warmup(detector_config)
         logger.info(f"ONNX: {path} loaded")
 
+    def _warmup(self, detector_config: ONNXDetectorConfig) -> None:
+        """Run a warmup inference to front-load one-time compilation costs.
+
+        Some GPU backends have a slow first inference: CUDA may need PTX JIT
+        compilation on newer architectures (e.g. NVIDIA 50-series / Blackwell),
+        and MIGraphX compiles the model graph on first run. Running it here
+        (during detector creation) keeps the watchdog start_time at 0.0 so the
+        process won't be killed.
+        """
+        if detector_config.model.input_tensor == InputTensorEnum.nchw:
+            shape = (1, 3, detector_config.model.height, detector_config.model.width)
+        else:
+            shape = (1, detector_config.model.height, detector_config.model.width, 3)
+
+        if detector_config.model.input_dtype in (
+            InputDTypeEnum.float,
+            InputDTypeEnum.float_denorm,
+        ):
+            dtype = np.float32
+        else:
+            dtype = np.uint8
+
+        logger.info("ONNX: warming up detector (may take a while on first run)...")
+        self.detect_raw(np.zeros(shape, dtype=dtype))
+
     def detect_raw(self, tensor_input: np.ndarray):
         if self.onnx_model_type == ModelTypeEnum.dfine:
             tensor_output = self.runner.run(