|
8 | 8 | from frigate.detectors.detection_runners import get_optimized_runner |
9 | 9 | from frigate.detectors.detector_config import ( |
10 | 10 | BaseDetectorConfig, |
| 11 | + InputDTypeEnum, |
| 12 | + InputTensorEnum, |
11 | 13 | ModelTypeEnum, |
12 | 14 | ) |
13 | 15 | from frigate.util.model import ( |
@@ -59,8 +61,34 @@ def __init__(self, detector_config: ONNXDetectorConfig): |
59 | 61 | if self.onnx_model_type == ModelTypeEnum.yolox: |
60 | 62 | self.calculate_grids_strides() |
61 | 63 |
|
| 64 | + self._warmup(detector_config) |
62 | 65 | logger.info(f"ONNX: {path} loaded") |
63 | 66 |
|
| 67 | + def _warmup(self, detector_config: ONNXDetectorConfig) -> None: |
| 68 | + """Run a warmup inference to front-load one-time compilation costs. |
| 69 | +
|
| 70 | + Some GPU backends have a slow first inference: CUDA may need PTX JIT |
| 71 | + compilation on newer architectures (e.g. NVIDIA 50-series / Blackwell), |
| 72 | + and MIGraphX compiles the model graph on first run. Running it here |
| 73 | + (during detector creation) keeps the watchdog start_time at 0.0 so the |
| 74 | + process won't be killed. |
| 75 | + """ |
| 76 | + if detector_config.model.input_tensor == InputTensorEnum.nchw: |
| 77 | + shape = (1, 3, detector_config.model.height, detector_config.model.width) |
| 78 | + else: |
| 79 | + shape = (1, detector_config.model.height, detector_config.model.width, 3) |
| 80 | + |
| 81 | + if detector_config.model.input_dtype in ( |
| 82 | + InputDTypeEnum.float, |
| 83 | + InputDTypeEnum.float_denorm, |
| 84 | + ): |
| 85 | + dtype = np.float32 |
| 86 | + else: |
| 87 | + dtype = np.uint8 |
| 88 | + |
| 89 | + logger.info("ONNX: warming up detector (may take a while on first run)...") |
| 90 | + self.detect_raw(np.zeros(shape, dtype=dtype)) |
| 91 | + |
64 | 92 | def detect_raw(self, tensor_input: np.ndarray): |
65 | 93 | if self.onnx_model_type == ModelTypeEnum.dfine: |
66 | 94 | tensor_output = self.runner.run( |
|
0 commit comments