Skip to content
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ cython_debug/
*.jpg
*.yaml
!.github/workflows/*.yaml
!ethogram/*.json
*.csv
*.txt

Expand Down
1 change: 1 addition & 0 deletions ethogram/yolo_equiv.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"horse": "zebra"}
1 change: 1 addition & 0 deletions ethogram/yolo_labels.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
["zebra", "horse", "giraffe", "baboon"]
27 changes: 24 additions & 3 deletions src/kabr_tools/detector2cvat.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
import json
import argparse
import cv2
from tqdm import tqdm
Expand All @@ -8,13 +9,16 @@
from kabr_tools.utils.draw import Draw


def detector2cvat(path_to_videos: str, path_to_save: str, model: str, show: bool) -> None:
def detector2cvat(path_to_videos: str, path_to_save: str,
target_labels: list, label_map: dict, model: str, show: bool) -> None:
"""
Detect objects with Ultralytics YOLO detections, apply SORT tracking and convert tracks to CVAT format.

Parameters:
path_to_videos - str. Path to the folder containing videos.
path_to_save - str. Path to the folder to save output xml & mp4 files.
target_labels - list. List of target labels to detect.
label_map - dict. Dictionary to rename labels.
model - str. YOLO model to use with detections.
show - bool. Flag to display detector's visualization.
"""
Expand All @@ -30,7 +34,7 @@ def detector2cvat(path_to_videos: str, path_to_save: str, model: str, show: bool

videos.append(f"{root}/{file}")

yolo = YOLOv8(weights=model, imgsz=3840, conf=0.5)
yolo = YOLOv8(weights=model, imgsz=3840, conf=0.5, target_labels=target_labels, label_map=label_map)

for i, video in enumerate(videos):
try:
Expand Down Expand Up @@ -121,6 +125,16 @@ def parse_args() -> argparse.Namespace:
help="path to save output xml & mp4 files",
required=True
)
local_parser.add_argument(
"--target_labels",
type=str,
help="path to target labels json"
)
local_parser.add_argument(
"--label_map",
type=str,
help="path to label map json"
)
local_parser.add_argument(
"--yolo",
type=str,
Expand All @@ -134,10 +148,17 @@ def parse_args() -> argparse.Namespace:
)
return local_parser.parse_args()

def load_json(file: str) -> dict:
if file:
with open(file, mode="r", encoding="utf-8") as file:
return json.load(file)
return None

def main() -> None:
args = parse_args()
detector2cvat(args.video, args.save, args.yolo, args.imshow)
target_labels = load_json(args.target_labels)
label_map = load_json(args.label_map)
detector2cvat(args.video, args.save, target_labels, label_map, args.yolo, args.imshow)


if __name__ == "__main__":
Expand Down
26 changes: 19 additions & 7 deletions src/kabr_tools/utils/yolo.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,23 @@
from ultralytics import YOLO

class YOLOv8:
def __init__(self, weights="yolov8x.pt", imgsz=640, conf=0.5):
def __init__(self, weights="yolov8x.pt",
imgsz=640, conf=0.5,
target_labels=None, label_map=None):
self.conf = conf
self.imgsz = imgsz
self.model = YOLO(weights)
self.names = self.model.names
self.names: dict = self.model.names

if target_labels:
self.target_labels = target_labels
else:
self.target_labels = ["zebra", "horse", "giraffe"]

if label_map:
self.label_map = label_map
else:
self.label_map = {"horse" : "zebra"}

def forward(self, image):
width = image.shape[1]
Expand All @@ -17,18 +29,18 @@ def forward(self, image):

for box, label, confidence in zip(boxes.xyxyn.numpy(), boxes.cls.numpy(), boxes.conf.numpy()):
if confidence > self.conf:
if self.names[label] in ["zebra", "horse", "giraffe"]:
if self.names[label] in self.target_labels:
box[0] = int(box[0] * width)
box[1] = int(box[1] * height)
box[2] = int(box[2] * width)
box[3] = int(box[3] * height)
box = box.astype(np.int32)
confidence = float(f"{confidence:.2f}")

if self.names[label] == "horse":
label = "Zebra"
else:
label = self.names[label].capitalize()
label = self.names[label]
if label in self.label_map:
label = self.label_map[label]
label = label.capitalize()

filtered.append(([box[0], box[1], box[2], box[3]], confidence, label))

Expand Down
10 changes: 10 additions & 0 deletions tests/test_detector2cvat.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ def setUp(self):
self.tool = "detector2cvat.py"
self.video = TestDetector2Cvat.dir
self.save = "tests/detector2cvat"
self.target_labels = "ethogram/yolo_labels.json"
self.label_map = "ethogram/yolo_equiv.json"
self.yolo = "yolov5s.pt"
self.dir = "/".join(os.path.splitext(self.video)[0].split('/')[-2:])

Expand Down Expand Up @@ -322,6 +324,10 @@ def test_parse_arg_min(self):
self.assertEqual(args.video, self.video)
self.assertEqual(args.save, self.save)

# check default argument values
self.assertEqual(args.target_labels, None)
self.assertEqual(args.label_map, None)

# check default argument values
self.assertEqual(args.yolo, "yolov8x.pt")
self.assertEqual(args.imshow, False)
Expand All @@ -332,13 +338,17 @@ def test_parse_arg_full(self, imshow):
sys.argv = [self.tool,
"--video", self.video,
"--save", self.save,
"--target_labels", self.target_labels,
"--label_map", self.label_map,
"--yolo", self.yolo,
"--imshow"]
args = detector2cvat.parse_args()

# check parsed argument values
self.assertEqual(args.video, self.video)
self.assertEqual(args.save, self.save)
self.assertEqual(args.target_labels, self.target_labels)
self.assertEqual(args.label_map, self.label_map)
self.assertEqual(args.yolo, self.yolo)
self.assertEqual(args.imshow, True)

Expand Down
139 changes: 139 additions & 0 deletions tests/test_yolo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
import unittest
from unittest.mock import MagicMock, patch
from collections import OrderedDict
import numpy as np
import torch
from ultralytics import YOLO
from kabr_tools.utils.yolo import YOLOv8

# from yolov8x.pt
LABELS = {"zebra": 22, "horse": 17, "giraffe": 23, "bear": 21}

def rescale(box, width, height):
return [box[0] * width, box[1] * height, box[2] * width, box[3] * height]


class MockBox:
def __init__(self, box=[[0, 0, 0, 0]], cls=["zebra"], conf=[0.95]):
self.xyxyn = None
self.cls = None
self.conf = None

def mock(self, boxes, classes, confs):
self.xyxyn = torch.Tensor(boxes)
self.cls = torch.Tensor([LABELS[cls] for cls in classes])
self.conf = torch.Tensor(confs)
return self


class TestYolo(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.im = np.zeros((100, 101, 3), dtype=np.uint8)
cls.box = OrderedDict([("x1", 10), ("y1", 20), ("x2", 30), ("y2", 40)])
cls.box_values = list(cls.box.values())

@patch("kabr_tools.utils.yolo.YOLO")
def test_forward(self, yolo_mock):
im = TestYolo.im
yolo_model = MagicMock()
yolo_model.predict.return_value.__getitem__ = lambda x, _: x
yolo_model.names = YOLO("yolov8x.pt").names
yolo_mock.return_value = yolo_model

# horse -> zebra
points = [[0] * 4] * 3
labels = ["zebra", "horse", "giraffe"]
expect_labels = ["Zebra", "Zebra", "Giraffe"]
probs = [0.7, 0.8, 0.9]
yolo_boxes = MockBox().mock(points, labels, probs)
yolo_model.predict.return_value.boxes.cpu.return_value = yolo_boxes

yolo = YOLOv8()
preds = yolo.forward(im)

self.assertEqual(len(preds), 3)
for i, pred in enumerate(preds):
self.assertEqual(preds[i][0], points[i])
self.assertEqual(preds[i][1], probs[i])
self.assertEqual(preds[i][2], expect_labels[i])

# bear -> filtered
points = [[0] * 4] * 3
labels = ["bear", "horse", "giraffe"]
expect_labels = [None, "Zebra", "Giraffe"]
probs = [0.9, 0.8, 0.9]
yolo_boxes = MockBox().mock(points, labels, probs)
yolo_model.predict.return_value.boxes.cpu.return_value = yolo_boxes

yolo = YOLOv8()
preds = yolo.forward(im)

self.assertEqual(len(preds), 2)
index = 0
for pred in preds:
while expect_labels[index] is None:
index += 1
self.assertEqual(pred[0], rescale(points[index], im.shape[1], im.shape[0]))
self.assertEqual(pred[1], probs[index])
self.assertEqual(pred[2], expect_labels[index])
index += 1

# low prob -> filtered
points = [[i] * 4 for i in range(8)]
labels = ["bear", "horse", "zebra", "giraffe", "bear", "horse", "zebra", "giraffe"]
expect_labels = [None, "Zebra", None, "Giraffe", None, "Zebra", None, None]
probs = [0.5, 0.9, 0.4, 0.8, 0.7, 0.6, 0.3, 0.5]
yolo_boxes = MockBox().mock(points, labels, probs)
yolo_model.predict.return_value.boxes.cpu.return_value = yolo_boxes

yolo = YOLOv8()
preds = yolo.forward(im)

self.assertEqual(len(preds), 3)
index = 0
for pred in preds:
while expect_labels[index] is None:
index += 1
self.assertEqual(pred[0], rescale(points[index], im.shape[1], im.shape[0]))
self.assertEqual(pred[1], probs[index])
self.assertEqual(pred[2], expect_labels[index])
index += 1

@patch("kabr_tools.utils.yolo.YOLO")
def test_yolo_with_params(self, yolo_mock):
im = TestYolo.im
yolo_model = MagicMock()
yolo_model.predict.return_value.__getitem__ = lambda x, _: x
yolo_model.names = YOLO("yolov8x.pt").names
yolo_mock.return_value = yolo_model

points = [[i] * 4 for i in range(8)]
labels = ["bear", "horse", "zebra", "giraffe", "bear", "horse", "zebra", "giraffe"]
expect_labels = ["Panda", "Fish", None, None, None, None, None, "Giraffe"]
probs = [0.91, 0.99, 0.92, 0.55, 0.9, 0.89, 0.85, 0.93]
yolo_boxes = MockBox().mock(points, labels, probs)
yolo_model.predict.return_value.boxes.cpu.return_value = yolo_boxes

yolo = YOLOv8(weights="yolov8x.pt",
imgsz=640, conf=0.9,
target_labels=["bear", "horse", "giraffe"],
label_map={"bear": "panda", "horse": "fish"})
preds = yolo.forward(im)

self.assertEqual(len(preds), 3)
index = 0
for pred in preds:
while expect_labels[index] is None:
index += 1
self.assertEqual(pred[0], rescale(points[index], im.shape[1], im.shape[0]))
self.assertEqual(pred[1], probs[index])
self.assertEqual(pred[2], expect_labels[index])
index += 1

def test_get_centroid(self):
box = TestYolo.box
box_values = TestYolo.box_values
x, y = YOLOv8.get_centroid(box_values)
self.assertEqual(x, (box["x1"] + box["x2"]) // 2)
self.assertEqual(y, (box["y1"] + box["y2"]) // 2)
Loading