Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 36 additions & 55 deletions examples/speed_estimation/inference_example.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import argparse
import os
from collections import defaultdict, deque

Expand Down Expand Up @@ -38,64 +37,39 @@ def transform_points(self, points: np.ndarray) -> np.ndarray:
return transformed_points.reshape(-1, 2)


def parse_arguments() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="Vehicle Speed Estimation using Inference and Supervision"
)
parser.add_argument(
"--model_id",
default="yolov8x-640",
help="Roboflow model ID",
type=str,
)
parser.add_argument(
"--roboflow_api_key",
default=None,
help="Roboflow API KEY",
type=str,
)
parser.add_argument(
"--source_video_path",
required=True,
help="Path to the source video file",
type=str,
)
parser.add_argument(
"--target_video_path",
required=True,
help="Path to the target video file (output)",
type=str,
)
parser.add_argument(
"--confidence_threshold",
default=0.3,
help="Confidence threshold for the model",
type=float,
)
parser.add_argument(
"--iou_threshold", default=0.7, help="IOU threshold for the model", type=float
)

return parser.parse_args()


if __name__ == "__main__":
args = parse_arguments()

api_key = args.roboflow_api_key
def main(
source_video_path: str,
target_video_path: str,
model_id: str = "yolov8x-640",
roboflow_api_key: str | None = None,
confidence_threshold: float = 0.3,
iou_threshold: float = 0.7,
):
"""
Vehicle Speed Estimation using Inference and Supervision.

Args:
source_video_path: Path to the source video file
target_video_path: Path to the target video file (output)
model_id: Roboflow model ID
roboflow_api_key: Roboflow API KEY
confidence_threshold: Confidence threshold for the model
iou_threshold: IOU threshold for the model
"""
api_key = roboflow_api_key
api_key = os.environ.get("ROBOFLOW_API_KEY", api_key)
if api_key is None:
raise ValueError(
"Roboflow API key is missing. Please provide it as an argument or set the "
"ROBOFLOW_API_KEY environment variable."
)
args.roboflow_api_key = api_key
roboflow_api_key = api_key

video_info = sv.VideoInfo.from_video_path(video_path=args.source_video_path)
model = get_roboflow_model(model_id=args.model_id, api_key=args.roboflow_api_key)
video_info = sv.VideoInfo.from_video_path(video_path=source_video_path)
model = get_roboflow_model(model_id=model_id, api_key=roboflow_api_key)

byte_track = sv.ByteTrack(
frame_rate=video_info.fps, track_activation_threshold=args.confidence_threshold
frame_rate=video_info.fps, track_activation_threshold=confidence_threshold
)

thickness = sv.calculate_optimal_line_thickness(
Expand All @@ -114,20 +88,20 @@ def parse_arguments() -> argparse.Namespace:
position=sv.Position.BOTTOM_CENTER,
)

frame_generator = sv.get_video_frames_generator(source_path=args.source_video_path)
frame_generator = sv.get_video_frames_generator(source_path=source_video_path)

polygon_zone = sv.PolygonZone(polygon=SOURCE)
view_transformer = ViewTransformer(source=SOURCE, target=TARGET)

coordinates = defaultdict(lambda: deque(maxlen=video_info.fps))

with sv.VideoSink(args.target_video_path, video_info) as sink:
with sv.VideoSink(target_video_path, video_info) as sink:
for frame in frame_generator:
results = model.infer(frame)[0]
results = model.infer(
frame, confidence=confidence_threshold, iou=iou_threshold
)[0]
detections = sv.Detections.from_inference(results)
detections = detections[detections.confidence > args.confidence_threshold]
detections = detections[polygon_zone.trigger(detections)]
detections = detections.with_nms(threshold=args.iou_threshold)
detections = byte_track.update_with_detections(detections=detections)

points = detections.get_anchors_coordinates(
Expand Down Expand Up @@ -166,3 +140,10 @@ def parse_arguments() -> argparse.Namespace:
if cv2.waitKey(1) & 0xFF == ord("q"):
break
cv2.destroyAllWindows()


if __name__ == "__main__":
from jsonargparse import auto_cli, set_parsing_settings

set_parsing_settings(parse_optionals_as_positionals=True)
auto_cli(main, as_positional=False)
1 change: 1 addition & 0 deletions examples/speed_estimation/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ requests
ultralytics
super-gradients==3.5.0
inference
jsonargparse[signatures]
67 changes: 27 additions & 40 deletions examples/speed_estimation/ultralytics_example.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import argparse
from collections import defaultdict, deque

import cv2
Expand Down Expand Up @@ -37,43 +36,26 @@ def transform_points(self, points: np.ndarray) -> np.ndarray:
return transformed_points.reshape(-1, 2)


def parse_arguments() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="Vehicle Speed Estimation using Ultralytics and Supervision"
)
parser.add_argument(
"--source_video_path",
required=True,
help="Path to the source video file",
type=str,
)
parser.add_argument(
"--target_video_path",
required=True,
help="Path to the target video file (output)",
type=str,
)
parser.add_argument(
"--confidence_threshold",
default=0.3,
help="Confidence threshold for the model",
type=float,
)
parser.add_argument(
"--iou_threshold", default=0.7, help="IOU threshold for the model", type=float
)

return parser.parse_args()


if __name__ == "__main__":
args = parse_arguments()

video_info = sv.VideoInfo.from_video_path(video_path=args.source_video_path)
def main(
source_video_path: str,
target_video_path: str,
confidence_threshold: float = 0.3,
iou_threshold: float = 0.7,
):
"""
Vehicle Speed Estimation using Ultralytics and Supervision.

Args:
source_video_path: Path to the source video file
target_video_path: Path to the target video file (output)
confidence_threshold: Confidence threshold for the model
iou_threshold: IOU threshold for the model
"""
video_info = sv.VideoInfo.from_video_path(video_path=source_video_path)
model = YOLO("yolo11x.pt")

byte_track = sv.ByteTrack(
frame_rate=video_info.fps, track_activation_threshold=args.confidence_threshold
frame_rate=video_info.fps, track_activation_threshold=confidence_threshold
)

thickness = sv.calculate_optimal_line_thickness(
Expand All @@ -92,20 +74,18 @@ def parse_arguments() -> argparse.Namespace:
position=sv.Position.BOTTOM_CENTER,
)

frame_generator = sv.get_video_frames_generator(source_path=args.source_video_path)
frame_generator = sv.get_video_frames_generator(source_path=source_video_path)

polygon_zone = sv.PolygonZone(polygon=SOURCE)
view_transformer = ViewTransformer(source=SOURCE, target=TARGET)

coordinates = defaultdict(lambda: deque(maxlen=video_info.fps))

with sv.VideoSink(args.target_video_path, video_info) as sink:
with sv.VideoSink(target_video_path, video_info) as sink:
for frame in frame_generator:
result = model(frame)[0]
result = model(frame, conf=confidence_threshold, iou=iou_threshold)[0]
detections = sv.Detections.from_ultralytics(result)
detections = detections[detections.confidence > args.confidence_threshold]
detections = detections[polygon_zone.trigger(detections)]
detections = detections.with_nms(threshold=args.iou_threshold)
detections = byte_track.update_with_detections(detections=detections)

points = detections.get_anchors_coordinates(
Expand Down Expand Up @@ -144,3 +124,10 @@ def parse_arguments() -> argparse.Namespace:
if cv2.waitKey(1) & 0xFF == ord("q"):
break
cv2.destroyAllWindows()


if __name__ == "__main__":
from jsonargparse import auto_cli, set_parsing_settings

set_parsing_settings(parse_optionals_as_positionals=True)
auto_cli(main, as_positional=False)
68 changes: 29 additions & 39 deletions examples/speed_estimation/yolo_nas_example.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import argparse
from collections import defaultdict, deque

import cv2
Expand Down Expand Up @@ -38,43 +37,26 @@ def transform_points(self, points: np.ndarray) -> np.ndarray:
return transformed_points.reshape(-1, 2)


def parse_arguments() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="Vehicle Speed Estimation using YOLO-NAS and Supervision"
)
parser.add_argument(
"--source_video_path",
required=True,
help="Path to the source video file",
type=str,
)
parser.add_argument(
"--target_video_path",
required=True,
help="Path to the target video file (output)",
type=str,
)
parser.add_argument(
"--confidence_threshold",
default=0.3,
help="Confidence threshold for the model",
type=float,
)
parser.add_argument(
"--iou_threshold", default=0.7, help="IOU threshold for the model", type=float
)

return parser.parse_args()


if __name__ == "__main__":
args = parse_arguments()

video_info = sv.VideoInfo.from_video_path(video_path=args.source_video_path)
def main(
source_video_path: str,
target_video_path: str,
confidence_threshold: float = 0.3,
iou_threshold: float = 0.7,
):
"""
Vehicle Speed Estimation using YOLO-NAS and Supervision.

Args:
source_video_path: Path to the source video file
target_video_path: Path to the target video file (output)
confidence_threshold: Confidence threshold for the model
iou_threshold: IOU threshold for the model
"""
video_info = sv.VideoInfo.from_video_path(video_path=source_video_path)
model = models.get(Models.YOLO_NAS_L, pretrained_weights="coco")

byte_track = sv.ByteTrack(
frame_rate=video_info.fps, track_activation_threshold=args.confidence_threshold
frame_rate=video_info.fps, track_activation_threshold=confidence_threshold
)

thickness = sv.calculate_optimal_line_thickness(
Expand All @@ -93,19 +75,20 @@ def parse_arguments() -> argparse.Namespace:
position=sv.Position.BOTTOM_CENTER,
)

frame_generator = sv.get_video_frames_generator(source_path=args.source_video_path)
frame_generator = sv.get_video_frames_generator(source_path=source_video_path)

polygon_zone = sv.PolygonZone(polygon=SOURCE)
view_transformer = ViewTransformer(source=SOURCE, target=TARGET)

coordinates = defaultdict(lambda: deque(maxlen=video_info.fps))

with sv.VideoSink(args.target_video_path, video_info) as sink:
with sv.VideoSink(target_video_path, video_info) as sink:
for frame in frame_generator:
result = model.predict(frame)[0]
result = model.predict(frame, conf=confidence_threshold, iou=iou_threshold)[
0
]
detections = sv.Detections.from_yolo_nas(result)
detections = detections[polygon_zone.trigger(detections)]
detections = detections.with_nms(threshold=args.iou_threshold)
detections = byte_track.update_with_detections(detections=detections)

points = detections.get_anchors_coordinates(
Expand Down Expand Up @@ -144,3 +127,10 @@ def parse_arguments() -> argparse.Namespace:
if cv2.waitKey(1) & 0xFF == ord("q"):
break
cv2.destroyAllWindows()


if __name__ == "__main__":
from jsonargparse import auto_cli, set_parsing_settings

set_parsing_settings(parse_optionals_as_positionals=True)
auto_cli(main, as_positional=False)