From 4f4678589bc9b6eef7c58e4ea7204670301e8043 Mon Sep 17 00:00:00 2001 From: SkalskiP Date: Wed, 30 Jul 2025 14:05:36 +0200 Subject: [PATCH 1/9] migrate YT video file download script to use `yt_dlp`; old script didn't work anymore --- examples/time_in_zone/requirements.txt | 5 +-- .../scripts/download_from_youtube.py | 40 +++++++++++++++---- 2 files changed, 35 insertions(+), 10 deletions(-) diff --git a/examples/time_in_zone/requirements.txt b/examples/time_in_zone/requirements.txt index b5ff1911b..0503fdca5 100644 --- a/examples/time_in_zone/requirements.txt +++ b/examples/time_in_zone/requirements.txt @@ -1,6 +1,5 @@ supervision ultralytics inference -# https://github.com/pytube/pytube/issues/2044 -# pytube -pytubefix +rfdetr +yt_dlp diff --git a/examples/time_in_zone/scripts/download_from_youtube.py b/examples/time_in_zone/scripts/download_from_youtube.py index b74009557..4ab2d4c6c 100644 --- a/examples/time_in_zone/scripts/download_from_youtube.py +++ b/examples/time_in_zone/scripts/download_from_youtube.py @@ -2,19 +2,45 @@ import argparse import os +import sys +from typing import Dict, Any -from pytubefix import YouTube +import yt_dlp +from yt_dlp.utils import DownloadError + + +def _build_ydl_opts(output_path: str | None, file_name: str | None) -> Dict[str, Any]: + out_dir = output_path or "." + + if not os.path.exists(out_dir): + os.makedirs(out_dir) + + name_template = file_name if file_name else "%(title)s.%(ext)s" + + return { + "format": ( + "bestvideo[ext=mp4][vcodec!*=av01][height<=2160]+bestaudio[ext=m4a]/" + "best[ext=mp4][vcodec!*=av01][height<=2160]/" + "bestvideo+bestaudio/best" + ), + "merge_output_format": "mp4", + "outtmpl": os.path.join(out_dir, name_template), + "quiet": False, + "noplaylist": True, + } def main(url: str, output_path: str | None, file_name: str | None) -> None: - yt = YouTube(url) - stream = yt.streams.get_highest_resolution() + ydl_opts = _build_ydl_opts(output_path, file_name) - if not os.path.exists(output_path): - os.makedirs(output_path) + try: + with yt_dlp.YoutubeDL(ydl_opts) as ydl: + ydl.download([url]) + except DownloadError as err: + print(f"Download failed: {err}", file=sys.stderr) + sys.exit(1) - stream.download(output_path=output_path, filename=file_name) - final_name = file_name if file_name else yt.title + final_name = file_name if file_name else "the video title" final_path = output_path if output_path else "current directory" print(f"Download completed! Video saved as '{final_name}' in '{final_path}'.") From a891079bda42719695c9ac450151d75fc806c985 Mon Sep 17 00:00:00 2001 From: SkalskiP Date: Wed, 30 Jul 2025 14:11:49 +0200 Subject: [PATCH 2/9] RF-DETR file example works --- examples/time_in_zone/rfdetr_file_example.py | 222 +++++++++++++++++++ 1 file changed, 222 insertions(+) create mode 100644 examples/time_in_zone/rfdetr_file_example.py diff --git a/examples/time_in_zone/rfdetr_file_example.py b/examples/time_in_zone/rfdetr_file_example.py new file mode 100644 index 000000000..b7c4db55a --- /dev/null +++ b/examples/time_in_zone/rfdetr_file_example.py @@ -0,0 +1,222 @@ +from __future__ import annotations + +import argparse +from enum import Enum + +import cv2 +import numpy as np +from rfdetr import RFDETRNano, RFDETRSmall, RFDETRMedium, RFDETRBase, RFDETRLarge + +import supervision as sv +from utils.general import find_in_list, load_zones_config +from utils.timers import FPSBasedTimer + +COLORS = sv.ColorPalette.from_hex(["#E6194B", "#3CB44B", "#FFE119", "#3C76D1"]) +COLOR_ANNOTATOR = sv.ColorAnnotator(color=COLORS) +LABEL_ANNOTATOR = sv.LabelAnnotator(color=COLORS, text_color=sv.Color.from_hex("#000000")) + + +class ModelSize(Enum): + + NANO = "nano" + SMALL = "small" + MEDIUM = "medium" + BASE = "base" + LARGE = "large" + + @classmethod + def list(cls): + return list(map(lambda c: c.value, cls)) + + @classmethod + def from_value(cls, value: "ModelSize" | str) -> "ModelSize": + if isinstance(value, cls): + return value + if isinstance(value, str): + value = value.lower() + try: + return cls(value) + except ValueError: + raise ValueError(f"Invalid value: {value}. Must be one of {cls.list()}") + raise ValueError( + f"Invalid value type: {type(value)}. Must be an instance of " + f"{cls.__name__} or str." + ) + + +def load_model(checkpoint: ModelSize | str, device: str, resolution: int): + checkpoint = ModelSize.from_value(checkpoint) + + if checkpoint == ModelSize.NANO: + return RFDETRNano(device=device, resolution=resolution) + if checkpoint == ModelSize.SMALL: + return RFDETRSmall(device=device, resolution=resolution) + if checkpoint == ModelSize.MEDIUM: + return RFDETRMedium(device=device, resolution=resolution) + if checkpoint == ModelSize.BASE: + return RFDETRBase(device=device, resolution=resolution) + if checkpoint == ModelSize.LARGE: + return RFDETRLarge(device=device, resolution=resolution) + + raise ValueError( + f"Invalid checkpoint: {checkpoint}. " + f"Must be one of: {ModelSize.list()}." + ) + + +def adjust_resolution(checkpoint: ModelSize | str, resolution: int) -> int: + checkpoint = ModelSize.from_value(checkpoint) + + if checkpoint in {ModelSize.NANO, ModelSize.SMALL, ModelSize.MEDIUM}: + divisor = 32 + elif checkpoint in {ModelSize.BASE, ModelSize.LARGE}: + divisor = 56 + else: + raise ValueError( + f"Unknown checkpoint: {checkpoint}. " + f"Must be one of: {ModelSize.list()}." + ) + + remainder = resolution % divisor + if remainder == 0: + return resolution + lower = resolution - remainder + upper = lower + divisor + + if resolution - lower < upper - resolution: + return lower + else: + return upper + + +def main( + source_video_path: str, + zone_configuration_path: str, + model_size: str, + device: str, + confidence: float, + iou: float, + classes: list[int], + resolution: int, +) -> None: + resolution = adjust_resolution(checkpoint=model_size, resolution=resolution) + model = load_model(checkpoint=model_size, device=device, resolution=resolution) + tracker = sv.ByteTrack(minimum_matching_threshold=0.5) + video_info = sv.VideoInfo.from_video_path(video_path=source_video_path) + frames_generator = sv.get_video_frames_generator(source_video_path) + + polygons = load_zones_config(file_path=zone_configuration_path) + zones = [ + sv.PolygonZone( + polygon=polygon, + triggering_anchors=(sv.Position.CENTER,), + ) + for polygon in polygons + ] + timers = [FPSBasedTimer(video_info.fps) for _ in zones] + + for frame in frames_generator: + detections = model.predict(frame, threshold=confidence) + detections = detections[find_in_list(detections.class_id, classes)] + detections = detections.with_nms(threshold=iou) + detections = tracker.update_with_detections(detections) + + annotated_frame = frame.copy() + + for idx, zone in enumerate(zones): + annotated_frame = sv.draw_polygon( + scene=annotated_frame, polygon=zone.polygon, color=COLORS.by_idx(idx) + ) + + detections_in_zone = detections[zone.trigger(detections)] + time_in_zone = timers[idx].tick(detections_in_zone) + custom_color_lookup = np.full(detections_in_zone.class_id.shape, idx) + + annotated_frame = COLOR_ANNOTATOR.annotate( + scene=annotated_frame, + detections=detections_in_zone, + custom_color_lookup=custom_color_lookup, + ) + labels = [ + f"#{tracker_id} {int(time // 60):02d}:{int(time % 60):02d}" + for tracker_id, time in zip(detections_in_zone.tracker_id, time_in_zone) + ] + annotated_frame = LABEL_ANNOTATOR.annotate( + scene=annotated_frame, + detections=detections_in_zone, + labels=labels, + custom_color_lookup=custom_color_lookup, + ) + + cv2.imshow("Processed Video", annotated_frame) + if cv2.waitKey(1) & 0xFF == ord("q"): + break + cv2.destroyAllWindows() + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Calculating detections dwell time in zones, using video file." + ) + parser.add_argument( + "--zone_configuration_path", + type=str, + required=True, + help="Path to the zone configuration JSON file.", + ) + parser.add_argument( + "--source_video_path", + type=str, + required=True, + help="Path to the source video file.", + ) + parser.add_argument( + "--model_size", + type=str, + default="small", + help="Size of RF-DETR model ('nano', 'small', 'medium', 'base' or 'large'). " + "Default is 'small'." + ) + parser.add_argument( + "--device", + type=str, + default="cpu", + help="Computation device ('cpu', 'mps' or 'cuda'). Default is 'cpu'.", + ) + parser.add_argument( + "--confidence_threshold", + type=float, + default=0.3, + help="Confidence level for detections (0 to 1). Default is 0.3.", + ) + parser.add_argument( + "--iou_threshold", + default=0.7, + type=float, + help="IOU threshold for non-max suppression. Default is 0.7.", + ) + parser.add_argument( + "--classes", + nargs="*", + type=int, + default=[], + help="List of class IDs to track. If empty, all classes are tracked.", + ) + parser.add_argument( + "--resolution", + default=640, + type=int, + required=True, + help="Resolution for the model input.", + ) + args = parser.parse_args() + + main( + source_video_path=args.source_video_path, + zone_configuration_path=args.zone_configuration_path, + model_size=args.model_size, + device=args.device, + confidence=args.confidence_threshold, + iou=args.iou_threshold, + classes=args.classes, + resolution=args.resolution, + ) \ No newline at end of file From d5ccafa0b16cc985c1e22999d8174b3150bcb270 Mon Sep 17 00:00:00 2001 From: SkalskiP Date: Wed, 30 Jul 2025 14:15:11 +0200 Subject: [PATCH 3/9] ultralytics stream example migrated to new inference --- examples/time_in_zone/ultralytics_stream_example.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/examples/time_in_zone/ultralytics_stream_example.py b/examples/time_in_zone/ultralytics_stream_example.py index ca5000a4d..e4bc1e893 100644 --- a/examples/time_in_zone/ultralytics_stream_example.py +++ b/examples/time_in_zone/ultralytics_stream_example.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import argparse import cv2 @@ -87,9 +89,9 @@ def main( ) -> None: model = YOLO(weights) - def inference_callback(frame: VideoFrame) -> sv.Detections: - results = model(frame.image, verbose=False, conf=confidence, device=device)[0] - return sv.Detections.from_ultralytics(results).with_nms(threshold=iou) + def inference_callback(frames: list[VideoFrame]) -> list[sv.Detections]: + results = model(frames[0].image, verbose=False, conf=confidence, device=device)[0] + return [sv.Detections.from_ultralytics(results).with_nms(threshold=iou)] sink = CustomSink(zone_configuration_path=zone_configuration_path, classes=classes) @@ -126,8 +128,8 @@ def inference_callback(frame: VideoFrame) -> sv.Detections: parser.add_argument( "--weights", type=str, - default="yolov8s.pt", - help="Path to the model weights file. Default is 'yolov8s.pt'.", + default="yolov8x.pt", + help="Path to the model weights file. Default is 'yolov8x.pt'.", ) parser.add_argument( "--device", From ac9dcc7f558ad75ba8b912e5d26800e6785006ae Mon Sep 17 00:00:00 2001 From: SkalskiP Date: Wed, 30 Jul 2025 14:20:33 +0200 Subject: [PATCH 4/9] migrate inference scripts to use RF-DETR-Medium --- examples/time_in_zone/inference_file_example.py | 2 +- examples/time_in_zone/inference_naive_stream_example.py | 2 +- examples/time_in_zone/inference_stream_example.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/time_in_zone/inference_file_example.py b/examples/time_in_zone/inference_file_example.py index 6dcaf353b..ad6a884a2 100644 --- a/examples/time_in_zone/inference_file_example.py +++ b/examples/time_in_zone/inference_file_example.py @@ -94,7 +94,7 @@ def main( help="Path to the source video file.", ) parser.add_argument( - "--model_id", type=str, default="yolov8s-640", help="Roboflow model ID." + "--model_id", type=str, default="rfdetr-medium", help="Roboflow model ID." ) parser.add_argument( "--confidence_threshold", diff --git a/examples/time_in_zone/inference_naive_stream_example.py b/examples/time_in_zone/inference_naive_stream_example.py index 98f60d9d0..fa61d748f 100644 --- a/examples/time_in_zone/inference_naive_stream_example.py +++ b/examples/time_in_zone/inference_naive_stream_example.py @@ -104,7 +104,7 @@ def main( help="Complete RTSP URL for the video stream.", ) parser.add_argument( - "--model_id", type=str, default="yolov8s-640", help="Roboflow model ID." + "--model_id", type=str, default="rfdetr-medium", help="Roboflow model ID." ) parser.add_argument( "--confidence_threshold", diff --git a/examples/time_in_zone/inference_stream_example.py b/examples/time_in_zone/inference_stream_example.py index 91056d137..715ec4b14 100644 --- a/examples/time_in_zone/inference_stream_example.py +++ b/examples/time_in_zone/inference_stream_example.py @@ -119,7 +119,7 @@ def main( help="Complete RTSP URL for the video stream.", ) parser.add_argument( - "--model_id", type=str, default="yolov8s-640", help="Roboflow model ID." + "--model_id", type=str, default="rfdetr-medium", help="Roboflow model ID." ) parser.add_argument( "--confidence_threshold", From 14065723798ecf54bbb07041e3e92d2e9ed8d52c Mon Sep 17 00:00:00 2001 From: SkalskiP Date: Wed, 30 Jul 2025 14:36:51 +0200 Subject: [PATCH 5/9] RF-DETR naive stream example --- .../rfdetr_naive_stream_example.py | 232 ++++++++++++++++++ 1 file changed, 232 insertions(+) create mode 100644 examples/time_in_zone/rfdetr_naive_stream_example.py diff --git a/examples/time_in_zone/rfdetr_naive_stream_example.py b/examples/time_in_zone/rfdetr_naive_stream_example.py new file mode 100644 index 000000000..3548cce2d --- /dev/null +++ b/examples/time_in_zone/rfdetr_naive_stream_example.py @@ -0,0 +1,232 @@ +from __future__ import annotations + +import argparse +from enum import Enum + +import cv2 +import numpy as np +from rfdetr import RFDETRNano, RFDETRSmall, RFDETRMedium, RFDETRBase, RFDETRLarge + +import supervision as sv +from utils.general import find_in_list, get_stream_frames_generator, load_zones_config +from utils.timers import ClockBasedTimer + +COLORS = sv.ColorPalette.from_hex(["#E6194B", "#3CB44B", "#FFE119", "#3C76D1"]) +COLOR_ANNOTATOR = sv.ColorAnnotator(color=COLORS) +LABEL_ANNOTATOR = sv.LabelAnnotator(color=COLORS, text_color=sv.Color.from_hex("#000000")) + + +class ModelSize(Enum): + + NANO = "nano" + SMALL = "small" + MEDIUM = "medium" + BASE = "base" + LARGE = "large" + + @classmethod + def list(cls): + return list(map(lambda c: c.value, cls)) + + @classmethod + def from_value(cls, value: "ModelSize" | str) -> "ModelSize": + if isinstance(value, cls): + return value + if isinstance(value, str): + value = value.lower() + try: + return cls(value) + except ValueError: + raise ValueError(f"Invalid value: {value}. Must be one of {cls.list()}") + raise ValueError( + f"Invalid value type: {type(value)}. Must be an instance of " + f"{cls.__name__} or str." + ) + + +def load_model(checkpoint: ModelSize | str, device: str, resolution: int): + checkpoint = ModelSize.from_value(checkpoint) + + if checkpoint == ModelSize.NANO: + return RFDETRNano(device=device, resolution=resolution) + if checkpoint == ModelSize.SMALL: + return RFDETRSmall(device=device, resolution=resolution) + if checkpoint == ModelSize.MEDIUM: + return RFDETRMedium(device=device, resolution=resolution) + if checkpoint == ModelSize.BASE: + return RFDETRBase(device=device, resolution=resolution) + if checkpoint == ModelSize.LARGE: + return RFDETRLarge(device=device, resolution=resolution) + + raise ValueError( + f"Invalid checkpoint: {checkpoint}. " + f"Must be one of: {ModelSize.list()}." + ) + + +def adjust_resolution(checkpoint: ModelSize | str, resolution: int) -> int: + checkpoint = ModelSize.from_value(checkpoint) + + if checkpoint in {ModelSize.NANO, ModelSize.SMALL, ModelSize.MEDIUM}: + divisor = 32 + elif checkpoint in {ModelSize.BASE, ModelSize.LARGE}: + divisor = 56 + else: + raise ValueError( + f"Unknown checkpoint: {checkpoint}. " + f"Must be one of: {ModelSize.list()}." + ) + + remainder = resolution % divisor + if remainder == 0: + return resolution + lower = resolution - remainder + upper = lower + divisor + + if resolution - lower < upper - resolution: + return lower + else: + return upper + + +def main( + rtsp_url: str, + zone_configuration_path: str, + model_size: str, + device: str, + confidence: float, + iou: float, + classes: list[int], + resolution: int, +) -> None: + resolution = adjust_resolution(checkpoint=model_size, resolution=resolution) + model = load_model(checkpoint=model_size, device=device, resolution=resolution) + tracker = sv.ByteTrack(minimum_matching_threshold=0.5) + frames_generator = get_stream_frames_generator(rtsp_url=rtsp_url) + fps_monitor = sv.FPSMonitor() + + polygons = load_zones_config(file_path=zone_configuration_path) + zones = [ + sv.PolygonZone( + polygon=polygon, + triggering_anchors=(sv.Position.CENTER,), + ) + for polygon in polygons + ] + timers = [ClockBasedTimer() for _ in zones] + + for frame in frames_generator: + fps_monitor.tick() + fps = fps_monitor.fps + + detections = model.predict(frame, threshold=confidence) + detections = detections[find_in_list(detections.class_id, classes)] + detections = detections.with_nms(threshold=iou) + detections = tracker.update_with_detections(detections) + + annotated_frame = frame.copy() + annotated_frame = sv.draw_text( + scene=annotated_frame, + text=f"{fps:.1f}", + text_anchor=sv.Point(40, 30), + background_color=sv.Color.from_hex("#A351FB"), + text_color=sv.Color.from_hex("#000000"), + ) + + for idx, zone in enumerate(zones): + annotated_frame = sv.draw_polygon( + scene=annotated_frame, polygon=zone.polygon, color=COLORS.by_idx(idx) + ) + + detections_in_zone = detections[zone.trigger(detections)] + time_in_zone = timers[idx].tick(detections_in_zone) + custom_color_lookup = np.full(detections_in_zone.class_id.shape, idx) + + annotated_frame = COLOR_ANNOTATOR.annotate( + scene=annotated_frame, + detections=detections_in_zone, + custom_color_lookup=custom_color_lookup, + ) + labels = [ + f"#{tracker_id} {int(t // 60):02d}:{int(t % 60):02d}" + for tracker_id, t in zip(detections_in_zone.tracker_id, time_in_zone) + ] + annotated_frame = LABEL_ANNOTATOR.annotate( + scene=annotated_frame, + detections=detections_in_zone, + labels=labels, + custom_color_lookup=custom_color_lookup, + ) + + cv2.imshow("Processed Video", annotated_frame) + if cv2.waitKey(1) & 0xFF == ord("q"): + break + + cv2.destroyAllWindows() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Calculating detections dwell time in zones, using RTSP stream." + ) + parser.add_argument( + "--zone_configuration_path", + type=str, + required=True, + help="Path to the zone configuration JSON file.", + ) + parser.add_argument( + "--rtsp_url", + type=str, + required=True, + help="Complete RTSP URL for the video stream.", + ) + parser.add_argument( + "--model_size", + type=str, + default="small", + help="Size of RF-DETR model ('nano', 'small', 'medium', 'base', 'large')." + ) + parser.add_argument( + "--device", + type=str, + default="cpu", + help="Computation device ('cpu', 'mps' or 'cuda'). Default is 'cpu'.", + ) + parser.add_argument( + "--confidence_threshold", + type=float, + default=0.3, + help="Confidence level for detections (0 → 1). Default is 0.3.", + ) + parser.add_argument( + "--iou_threshold", + type=float, + default=0.7, + help="IOU threshold for non-max suppression. Default is 0.7.", + ) + parser.add_argument( + "--classes", + nargs="*", + type=int, + default=[], + help="List of class IDs to track. Empty list → all classes.", + ) + parser.add_argument( + "--resolution", + required=True, + type=int, + help="Input resolution for the model (will be rounded to a legal multiple).", + ) + args = parser.parse_args() + + main( + rtsp_url=args.rtsp_url, + zone_configuration_path=args.zone_configuration_path, + model_size=args.model_size, + device=args.device, + confidence=args.confidence_threshold, + iou=args.iou_threshold, + classes=args.classes, + resolution=args.resolution, + ) From aaf7202cddbd26fd001a0ba04b365a0fed98f213 Mon Sep 17 00:00:00 2001 From: SkalskiP Date: Wed, 30 Jul 2025 14:48:38 +0200 Subject: [PATCH 6/9] RF-DETR stream example --- .../time_in_zone/rfdetr_stream_example.py | 184 ++++++++++++++++++ 1 file changed, 184 insertions(+) create mode 100644 examples/time_in_zone/rfdetr_stream_example.py diff --git a/examples/time_in_zone/rfdetr_stream_example.py b/examples/time_in_zone/rfdetr_stream_example.py new file mode 100644 index 000000000..2b48c6da3 --- /dev/null +++ b/examples/time_in_zone/rfdetr_stream_example.py @@ -0,0 +1,184 @@ +from __future__ import annotations + +import argparse +from enum import Enum + +import cv2 +import numpy as np +from inference import InferencePipeline +from inference.core.interfaces.camera.entities import VideoFrame +from rfdetr import RFDETRNano, RFDETRSmall, RFDETRMedium, RFDETRBase, RFDETRLarge +import supervision as sv + +from utils.general import find_in_list, load_zones_config +from utils.timers import ClockBasedTimer + + +class ModelSize(Enum): + NANO = "nano" + SMALL = "small" + MEDIUM = "medium" + BASE = "base" + LARGE = "large" + + @classmethod + def list(cls): + return [c.value for c in cls] + + @classmethod + def from_value(cls, value: "ModelSize" | str) -> "ModelSize": + if isinstance(value, cls): + return value + if isinstance(value, str): + value = value.lower() + try: + return cls(value) + except ValueError as exc: + raise ValueError( + f"Invalid model size '{value}'. Must be one of {cls.list()}." + ) from exc + raise ValueError( + f"Invalid value type '{type(value)}'. Expected str or ModelSize." + ) + + +def load_model(checkpoint: ModelSize | str, device: str, resolution: int): + checkpoint = ModelSize.from_value(checkpoint) + if checkpoint == ModelSize.NANO: + return RFDETRNano(device=device, resolution=resolution) + if checkpoint == ModelSize.SMALL: + return RFDETRSmall(device=device, resolution=resolution) + if checkpoint == ModelSize.MEDIUM: + return RFDETRMedium(device=device, resolution=resolution) + if checkpoint == ModelSize.BASE: + return RFDETRBase(device=device, resolution=resolution) + if checkpoint == ModelSize.LARGE: + return RFDETRLarge(device=device, resolution=resolution) + raise RuntimeError("Unhandled checkpoint type.") + + +def adjust_resolution(checkpoint: ModelSize | str, resolution: int) -> int: + checkpoint = ModelSize.from_value(checkpoint) + divisor = 32 if checkpoint in {ModelSize.NANO, ModelSize.SMALL, ModelSize.MEDIUM} else 56 + remainder = resolution % divisor + if remainder == 0: + return resolution + lower = resolution - remainder + upper = lower + divisor + return lower if resolution - lower < upper - resolution else upper + + +COLORS = sv.ColorPalette.from_hex(["#E6194B", "#3CB44B", "#FFE119", "#3C76D1"]) +COLOR_ANNOTATOR = sv.ColorAnnotator(color=COLORS) +LABEL_ANNOTATOR = sv.LabelAnnotator(color=COLORS, text_color=sv.Color.from_hex("#000000")) + + +class CustomSink: + def __init__(self, zone_configuration_path: str, classes: list[int]): + self.classes = classes + self.tracker = sv.ByteTrack(minimum_matching_threshold=0.8) + self.fps_monitor = sv.FPSMonitor() + self.polygons = load_zones_config(file_path=zone_configuration_path) + self.timers = [ClockBasedTimer() for _ in self.polygons] + self.zones = [ + sv.PolygonZone( + polygon=polygon, + triggering_anchors=(sv.Position.CENTER,), + ) + for polygon in self.polygons + ] + + def on_prediction(self, detections: sv.Detections, frame: VideoFrame) -> None: + self.fps_monitor.tick() + fps = self.fps_monitor.fps + detections = detections[find_in_list(detections.class_id, self.classes)] + detections = self.tracker.update_with_detections(detections) + annotated_frame = frame.image.copy() + annotated_frame = sv.draw_text( + scene=annotated_frame, + text=f"{fps:.1f}", + text_anchor=sv.Point(40, 30), + background_color=sv.Color.from_hex("#A351FB"), + text_color=sv.Color.from_hex("#000000"), + ) + for idx, zone in enumerate(self.zones): + annotated_frame = sv.draw_polygon( + scene=annotated_frame, + polygon=zone.polygon, + color=COLORS.by_idx(idx), + ) + detections_in_zone = detections[zone.trigger(detections)] + time_in_zone = self.timers[idx].tick(detections_in_zone) + custom_color_lookup = np.full(detections_in_zone.class_id.shape, idx) + annotated_frame = COLOR_ANNOTATOR.annotate( + scene=annotated_frame, + detections=detections_in_zone, + custom_color_lookup=custom_color_lookup, + ) + labels = [ + f"#{tracker_id} {int(t // 60):02d}:{int(t % 60):02d}" + for tracker_id, t in zip(detections_in_zone.tracker_id, time_in_zone) + ] + annotated_frame = LABEL_ANNOTATOR.annotate( + scene=annotated_frame, + detections=detections_in_zone, + labels=labels, + custom_color_lookup=custom_color_lookup, + ) + cv2.imshow("Processed Video", annotated_frame) + cv2.waitKey(1) + + +def main( + rtsp_url: str, + zone_configuration_path: str, + model_size: str, + device: str, + confidence: float, + iou: float, + classes: list[int], + resolution: int, +) -> None: + resolution = adjust_resolution(checkpoint=model_size, resolution=resolution) + model = load_model(checkpoint=model_size, device=device, resolution=resolution) + + def inference_callback(frames: list[VideoFrame]) -> list[sv.Detections]: + dets = model.predict(frames[0].image, threshold=confidence) + return [dets.with_nms(threshold=iou)] + + sink = CustomSink(zone_configuration_path=zone_configuration_path, classes=classes) + pipeline = InferencePipeline.init_with_custom_logic( + video_reference=rtsp_url, + on_video_frame=inference_callback, + on_prediction=sink.on_prediction, + ) + pipeline.start() + try: + pipeline.join() + except KeyboardInterrupt: + pipeline.terminate() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Calculating detections dwell time in zones using an RTSP stream." + ) + parser.add_argument("--zone_configuration_path", required=True, type=str) + parser.add_argument("--rtsp_url", required=True, type=str) + parser.add_argument("--model_size", default="small", type=str) + parser.add_argument("--device", default="cpu", type=str) + parser.add_argument("--confidence_threshold", default=0.3, type=float) + parser.add_argument("--iou_threshold", default=0.7, type=float) + parser.add_argument("--classes", nargs="*", default=[], type=int) + parser.add_argument("--resolution", required=True, type=int) + args = parser.parse_args() + main( + rtsp_url=args.rtsp_url, + zone_configuration_path=args.zone_configuration_path, + model_size=args.model_size, + device=args.device, + confidence=args.confidence_threshold, + iou=args.iou_threshold, + classes=args.classes, + resolution=args.resolution, + ) From 85aa1b85f3c8c053a79bf8a543197e44968f3918 Mon Sep 17 00:00:00 2001 From: SkalskiP Date: Wed, 30 Jul 2025 18:51:27 +0200 Subject: [PATCH 7/9] updated README.md --- examples/time_in_zone/README.md | 114 +++++++++++++++++++++++--------- 1 file changed, 82 insertions(+), 32 deletions(-) diff --git a/examples/time_in_zone/README.md b/examples/time_in_zone/README.md index 5c91a80b3..f55c5d0c7 100644 --- a/examples/time_in_zone/README.md +++ b/examples/time_in_zone/README.md @@ -86,15 +86,10 @@ window where you can draw polygons on the source image or video file. The polygo be saved as a JSON file. - `--source_path`: Path to the source image or video file for drawing polygons. - - `--zone_configuration_path`: Path where the polygon annotations will be saved as a JSON file. - - `enter` - finish drawing the current polygon. - - `escape` - cancel drawing the current polygon. - - `q` - quit the drawing window. - - `s` - save zone configuration to a JSON file. ```bash @@ -128,8 +123,8 @@ Script to run object detection on a video file using the Roboflow Inference mode python inference_file_example.py \ --zone_configuration_path "data/checkout/config.json" \ --source_video_path "data/checkout/video.mp4" \ - --model_id "yolov8x-640" \ - --classes 0 \ + --model_id "rfdetr-medium" \ + --classes 1 \ --confidence_threshold 0.3 \ --iou_threshold 0.7 ``` @@ -140,8 +135,8 @@ https://github.com/roboflow/supervision/assets/26109316/d051cc8a-dd15-41d4-aa36- python inference_file_example.py \ --zone_configuration_path "data/traffic/config.json" \ --source_video_path "data/traffic/video.mp4" \ - --model_id "yolov8x-640" \ - --classes 2 5 6 7 \ + --model_id "rfdetr-medium" \ + --classes 3 6 7 8 \ --confidence_threshold 0.3 \ --iou_threshold 0.7 ``` @@ -150,7 +145,7 @@ https://github.com/roboflow/supervision/assets/26109316/5ec896d7-4b39-4426-8979- ### `inference_stream_example` -Script to run object detection on a video stream using the Roboflow Inference model. +Script to run object detection on an RTSP stream using Roboflow Inference model. - `--zone_configuration_path`: Path to the zone configuration JSON file. - `--rtsp_url`: Complete RTSP URL for the video stream. @@ -163,8 +158,8 @@ Script to run object detection on a video stream using the Roboflow Inference mo python inference_stream_example.py \ --zone_configuration_path "data/checkout/config.json" \ --rtsp_url "rtsp://localhost:8554/live0.stream" \ - --model_id "yolov8x-640" \ - --classes 0 \ + --model_id "rfdetr-medium" \ + --classes 1 \ --confidence_threshold 0.3 \ --iou_threshold 0.7 ``` @@ -173,14 +168,85 @@ python inference_stream_example.py \ python inference_stream_example.py \ --zone_configuration_path "data/traffic/config.json" \ --rtsp_url "rtsp://localhost:8554/live0.stream" \ - --model_id "yolov8x-640" \ - --classes 2 5 6 7 \ + --model_id "rfdetr-medium" \ + --classes 3 6 7 8 \ --confidence_threshold 0.3 \ --iou_threshold 0.7 ``` -
-👉 show ultralytics examples +### `rfdeter_file_example` + +Script to run object detection on a video file using the RF-DETR model. + +- `--zone_configuration_path`: Path to the zone configuration JSON file. +- `--source_video_path`: Path to the source video file. +- `--model_size`: Size of RF-DETR model ('nano', 'small', 'medium', 'base' or 'large'). Default is 'medium'. +- `--device`: Computation device ('cpu', 'mps' or 'cuda'). Default is 'cpu'. +- `--classes`: List of class IDs to track. If empty, all classes are tracked. +- `--confidence_threshold`: Confidence level for detections (`0` to `1`). Default is `0.3`. +- `--iou_threshold`: IOU threshold for non-max suppression. Default is `0.7`. +- `--resolution`: Resolution for the model input. Default is `640`. + +```bash +python rfdetr_file_example.py \ + --zone_configuration_path "data/checkout/config.json" \ + --source_video_path "data/checkout/video.mp4" \ + --model_size "medium" \ + --device="cpu" \ + --classes 1 \ + --confidence_threshold 0.3 \ + --iou_threshold 0.7 \ + --resolution 640 +``` + +```bash +python rfdetr_file_example.py \ + --zone_configuration_path "data/traffic/config.json" \ + --source_video_path "data/traffic/video.mp4" \ + --model_size "medium" \ + --device="cpu" \ + --classes 3 6 7 8 \ + --confidence_threshold 0.3 \ + --iou_threshold 0.7 \ + --resolution 640 +``` + +### `rfdeter_stream_example` + +Script to run object detection on an RTSP stream using the RF-DETR model. + +- `--zone_configuration_path`: Path to the zone-configuration JSON file defining the polygons. +- `--rtsp_url`: Complete RTSP URL of the live video stream. +- `--model_size`: RF-DETR backbone size to load — choose from 'nano', 'small', 'medium', 'base', or 'large' (default 'medium'). +- `--device`: Compute device to run the model on ('cpu', 'mps', or 'cuda'; default 'cpu'). +- `--classes`: Space-separated list of class IDs to track. Leave empty to track all classes. +- `--confidence_threshold`: Minimum confidence score for a detection to be kept, range 0–1 (default 0.3). +- `--iou_threshold`: IOU threshold applied during non-max suppression (default 0.7). +- `--resolution`: Shortest-side input resolution supplied to the model. The script will round it to the nearest valid multiple (default 640). + +```bash +python rfdetr_stream_example.py \ + --zone_configuration_path "data/checkout/config.json" \ + --rtsp_url "rtsp://localhost:8554/live0.stream" \ + --model_size "medium" \ + --device "cpu" \ + --classes 1 \ + --confidence_threshold 0.3 \ + --iou_threshold 0.7 \ + --resolution 640 +``` + +```bash +python rfdetr_stream_example.py \ + --zone_configuration_path "data/traffic/config.json" \ + --rtsp_url "rtsp://localhost:8554/live0.stream" \ + --model_size "medium" \ + --device "cpu" \ + --classes 3 6 7 8 \ + --confidence_threshold 0.3 \ + --iou_threshold 0.7 \ + --resolution 640 +``` ### `ultralytics_file_example` @@ -249,19 +315,3 @@ python ultralytics_stream_example.py \ --confidence_threshold 0.3 \ --iou_threshold 0.7 ``` - -
- -## © license - -This demo integrates two main components, each with its own licensing: - -- ultralytics: The object detection model used in this demo, YOLOv8, is distributed - under the [AGPL-3.0 license](https://github.com/ultralytics/ultralytics/blob/main/LICENSE). - You can find more details about this license here. - -- supervision: The analytics code that powers the zone-based analysis in this demo is - based on the Supervision library, which is licensed under the - [MIT license](https://github.com/roboflow/supervision/blob/develop/LICENSE.md). This - makes the Supervision part of the code fully open source and freely usable in your - projects. From 459acee45e0fd9acba8dfb114fc76878125a47bb Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 30 Jul 2025 16:52:35 +0000 Subject: [PATCH 8/9] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20auto=20?= =?UTF-8?q?format=20pre-commit=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- examples/time_in_zone/rfdetr_file_example.py | 24 +++++++++---------- .../rfdetr_naive_stream_example.py | 21 ++++++++-------- .../time_in_zone/rfdetr_stream_example.py | 16 ++++++++----- .../scripts/download_from_youtube.py | 4 ++-- .../ultralytics_stream_example.py | 4 +++- 5 files changed, 37 insertions(+), 32 deletions(-) diff --git a/examples/time_in_zone/rfdetr_file_example.py b/examples/time_in_zone/rfdetr_file_example.py index b7c4db55a..b89179524 100644 --- a/examples/time_in_zone/rfdetr_file_example.py +++ b/examples/time_in_zone/rfdetr_file_example.py @@ -5,19 +5,20 @@ import cv2 import numpy as np -from rfdetr import RFDETRNano, RFDETRSmall, RFDETRMedium, RFDETRBase, RFDETRLarge - -import supervision as sv +from rfdetr import RFDETRBase, RFDETRLarge, RFDETRMedium, RFDETRNano, RFDETRSmall from utils.general import find_in_list, load_zones_config from utils.timers import FPSBasedTimer +import supervision as sv + COLORS = sv.ColorPalette.from_hex(["#E6194B", "#3CB44B", "#FFE119", "#3C76D1"]) COLOR_ANNOTATOR = sv.ColorAnnotator(color=COLORS) -LABEL_ANNOTATOR = sv.LabelAnnotator(color=COLORS, text_color=sv.Color.from_hex("#000000")) +LABEL_ANNOTATOR = sv.LabelAnnotator( + color=COLORS, text_color=sv.Color.from_hex("#000000") +) class ModelSize(Enum): - NANO = "nano" SMALL = "small" MEDIUM = "medium" @@ -29,7 +30,7 @@ def list(cls): return list(map(lambda c: c.value, cls)) @classmethod - def from_value(cls, value: "ModelSize" | str) -> "ModelSize": + def from_value(cls, value: ModelSize | str) -> ModelSize: if isinstance(value, cls): return value if isinstance(value, str): @@ -59,8 +60,7 @@ def load_model(checkpoint: ModelSize | str, device: str, resolution: int): return RFDETRLarge(device=device, resolution=resolution) raise ValueError( - f"Invalid checkpoint: {checkpoint}. " - f"Must be one of: {ModelSize.list()}." + f"Invalid checkpoint: {checkpoint}. Must be one of: {ModelSize.list()}." ) @@ -73,8 +73,7 @@ def adjust_resolution(checkpoint: ModelSize | str, resolution: int) -> int: divisor = 56 else: raise ValueError( - f"Unknown checkpoint: {checkpoint}. " - f"Must be one of: {ModelSize.list()}." + f"Unknown checkpoint: {checkpoint}. Must be one of: {ModelSize.list()}." ) remainder = resolution % divisor @@ -153,6 +152,7 @@ def main( break cv2.destroyAllWindows() + if __name__ == "__main__": parser = argparse.ArgumentParser( description="Calculating detections dwell time in zones, using video file." @@ -174,7 +174,7 @@ def main( type=str, default="small", help="Size of RF-DETR model ('nano', 'small', 'medium', 'base' or 'large'). " - "Default is 'small'." + "Default is 'small'.", ) parser.add_argument( "--device", @@ -219,4 +219,4 @@ def main( iou=args.iou_threshold, classes=args.classes, resolution=args.resolution, - ) \ No newline at end of file + ) diff --git a/examples/time_in_zone/rfdetr_naive_stream_example.py b/examples/time_in_zone/rfdetr_naive_stream_example.py index 3548cce2d..3a09bd9a8 100644 --- a/examples/time_in_zone/rfdetr_naive_stream_example.py +++ b/examples/time_in_zone/rfdetr_naive_stream_example.py @@ -5,19 +5,20 @@ import cv2 import numpy as np -from rfdetr import RFDETRNano, RFDETRSmall, RFDETRMedium, RFDETRBase, RFDETRLarge - -import supervision as sv +from rfdetr import RFDETRBase, RFDETRLarge, RFDETRMedium, RFDETRNano, RFDETRSmall from utils.general import find_in_list, get_stream_frames_generator, load_zones_config from utils.timers import ClockBasedTimer +import supervision as sv + COLORS = sv.ColorPalette.from_hex(["#E6194B", "#3CB44B", "#FFE119", "#3C76D1"]) COLOR_ANNOTATOR = sv.ColorAnnotator(color=COLORS) -LABEL_ANNOTATOR = sv.LabelAnnotator(color=COLORS, text_color=sv.Color.from_hex("#000000")) +LABEL_ANNOTATOR = sv.LabelAnnotator( + color=COLORS, text_color=sv.Color.from_hex("#000000") +) class ModelSize(Enum): - NANO = "nano" SMALL = "small" MEDIUM = "medium" @@ -29,7 +30,7 @@ def list(cls): return list(map(lambda c: c.value, cls)) @classmethod - def from_value(cls, value: "ModelSize" | str) -> "ModelSize": + def from_value(cls, value: ModelSize | str) -> ModelSize: if isinstance(value, cls): return value if isinstance(value, str): @@ -59,8 +60,7 @@ def load_model(checkpoint: ModelSize | str, device: str, resolution: int): return RFDETRLarge(device=device, resolution=resolution) raise ValueError( - f"Invalid checkpoint: {checkpoint}. " - f"Must be one of: {ModelSize.list()}." + f"Invalid checkpoint: {checkpoint}. Must be one of: {ModelSize.list()}." ) @@ -73,8 +73,7 @@ def adjust_resolution(checkpoint: ModelSize | str, resolution: int) -> int: divisor = 56 else: raise ValueError( - f"Unknown checkpoint: {checkpoint}. " - f"Must be one of: {ModelSize.list()}." + f"Unknown checkpoint: {checkpoint}. Must be one of: {ModelSize.list()}." ) remainder = resolution % divisor @@ -185,7 +184,7 @@ def main( "--model_size", type=str, default="small", - help="Size of RF-DETR model ('nano', 'small', 'medium', 'base', 'large')." + help="Size of RF-DETR model ('nano', 'small', 'medium', 'base', 'large').", ) parser.add_argument( "--device", diff --git a/examples/time_in_zone/rfdetr_stream_example.py b/examples/time_in_zone/rfdetr_stream_example.py index 2b48c6da3..6e5d6f995 100644 --- a/examples/time_in_zone/rfdetr_stream_example.py +++ b/examples/time_in_zone/rfdetr_stream_example.py @@ -7,12 +7,12 @@ import numpy as np from inference import InferencePipeline from inference.core.interfaces.camera.entities import VideoFrame -from rfdetr import RFDETRNano, RFDETRSmall, RFDETRMedium, RFDETRBase, RFDETRLarge -import supervision as sv - +from rfdetr import RFDETRBase, RFDETRLarge, RFDETRMedium, RFDETRNano, RFDETRSmall from utils.general import find_in_list, load_zones_config from utils.timers import ClockBasedTimer +import supervision as sv + class ModelSize(Enum): NANO = "nano" @@ -26,7 +26,7 @@ def list(cls): return [c.value for c in cls] @classmethod - def from_value(cls, value: "ModelSize" | str) -> "ModelSize": + def from_value(cls, value: ModelSize | str) -> ModelSize: if isinstance(value, cls): return value if isinstance(value, str): @@ -59,7 +59,9 @@ def load_model(checkpoint: ModelSize | str, device: str, resolution: int): def adjust_resolution(checkpoint: ModelSize | str, resolution: int) -> int: checkpoint = ModelSize.from_value(checkpoint) - divisor = 32 if checkpoint in {ModelSize.NANO, ModelSize.SMALL, ModelSize.MEDIUM} else 56 + divisor = ( + 32 if checkpoint in {ModelSize.NANO, ModelSize.SMALL, ModelSize.MEDIUM} else 56 + ) remainder = resolution % divisor if remainder == 0: return resolution @@ -70,7 +72,9 @@ def adjust_resolution(checkpoint: ModelSize | str, resolution: int) -> int: COLORS = sv.ColorPalette.from_hex(["#E6194B", "#3CB44B", "#FFE119", "#3C76D1"]) COLOR_ANNOTATOR = sv.ColorAnnotator(color=COLORS) -LABEL_ANNOTATOR = sv.LabelAnnotator(color=COLORS, text_color=sv.Color.from_hex("#000000")) +LABEL_ANNOTATOR = sv.LabelAnnotator( + color=COLORS, text_color=sv.Color.from_hex("#000000") +) class CustomSink: diff --git a/examples/time_in_zone/scripts/download_from_youtube.py b/examples/time_in_zone/scripts/download_from_youtube.py index 4ab2d4c6c..92db92ae5 100644 --- a/examples/time_in_zone/scripts/download_from_youtube.py +++ b/examples/time_in_zone/scripts/download_from_youtube.py @@ -3,13 +3,13 @@ import argparse import os import sys -from typing import Dict, Any +from typing import Any, Dict import yt_dlp from yt_dlp.utils import DownloadError -def _build_ydl_opts(output_path: str | None, file_name: str | None) -> Dict[str, Any]: +def _build_ydl_opts(output_path: str | None, file_name: str | None) -> dict[str, Any]: out_dir = output_path or "." if not os.path.exists(out_dir): diff --git a/examples/time_in_zone/ultralytics_stream_example.py b/examples/time_in_zone/ultralytics_stream_example.py index e4bc1e893..2152e64ce 100644 --- a/examples/time_in_zone/ultralytics_stream_example.py +++ b/examples/time_in_zone/ultralytics_stream_example.py @@ -90,7 +90,9 @@ def main( model = YOLO(weights) def inference_callback(frames: list[VideoFrame]) -> list[sv.Detections]: - results = model(frames[0].image, verbose=False, conf=confidence, device=device)[0] + results = model(frames[0].image, verbose=False, conf=confidence, device=device)[ + 0 + ] return [sv.Detections.from_ultralytics(results).with_nms(threshold=iou)] sink = CustomSink(zone_configuration_path=zone_configuration_path, classes=classes) From 29e0501dab3999b57b685f3594e36f6daef771d4 Mon Sep 17 00:00:00 2001 From: SkalskiP Date: Wed, 30 Jul 2025 19:01:39 +0200 Subject: [PATCH 9/9] try to make ruff happy --- examples/time_in_zone/scripts/download_from_youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/time_in_zone/scripts/download_from_youtube.py b/examples/time_in_zone/scripts/download_from_youtube.py index 92db92ae5..e7efe8a6d 100644 --- a/examples/time_in_zone/scripts/download_from_youtube.py +++ b/examples/time_in_zone/scripts/download_from_youtube.py @@ -3,7 +3,7 @@ import argparse import os import sys -from typing import Any, Dict +from typing import Any import yt_dlp from yt_dlp.utils import DownloadError