Skip to content

Commit 9197f07

Browse files
authored
Merge pull request #497 from Nuzhny007/master
RF-DETR instance segmentation
2 parents 827b7fa + 9bb918f commit 9197f07

File tree

12 files changed

+427
-9
lines changed

12 files changed

+427
-9
lines changed

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
[![CodeQL](https://github.com/Smorodov/Multitarget-tracker/actions/workflows/codeql-analysis.yml/badge.svg)](https://github.com/Smorodov/Multitarget-tracker/actions/workflows/codeql-analysis.yml)
66

77
## Latest Features
8+
- Instance segmentation model from RF-DETR detector works with TensorRT! Export pre-trained PyTorch models [here (roboflow/rf-detr)](https://github.com/roboflow/rf-detr) to ONNX format and run Multitarget-tracker with `-e=6` example
89
- New linear assignment algorithm - [Jonker-Volgenant / LAPJV algorithm](https://github.com/yongyanghz/LAPJV-algorithm-c) used in [scipy](https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.linear_sum_assignment.html) as alternative for Hungarian allgorithm
910
- D-FINE detector works with TensorRT! Export pre-trained PyTorch models [here (Peterande/D-FINE)](https://github.com/Peterande/D-FINE) to ONNX format and run Multitarget-tracker with `-e=6` example
1011
- RF-DETR detector works with TensorRT! Export pre-trained PyTorch models [here (roboflow/rf-detr)](https://github.com/roboflow/rf-detr) to ONNX format and run Multitarget-tracker with `-e=6` example
@@ -20,6 +21,8 @@
2021
## Demo Videos
2122

2223
### Detection & Tracking
24+
25+
[![RF-DETR: detection vs instance segmentation](https://img.youtube.com/vi/oKy7jEKT83c/0.jpg)](https://youtu.be/oKy7jEKT83c)
2326
[![Satellite planes detection and tracking with YOLOv11-obb](https://img.youtube.com/vi/gTpWnkMF7Lg/0.jpg)](https://youtu.be/gTpWnkMF7Lg)
2427
[![4-in-1 latest SOTA detectors](https://img.youtube.com/vi/Pb_HnejRpY4/0.jpg)](https://youtu.be/Pb_HnejRpY4)
2528
[![YOLOv8-obb detection with rotated boxes](https://img.youtube.com/vi/1e6ur57Fhzs/0.jpg)](https://youtu.be/1e6ur57Fhzs)

data/settings_rfdetr_seg.ini

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
[detection]
2+
3+
#-----------------------------
4+
# opencv_dnn = 12
5+
# darknet_cudnn = 10
6+
# tensorrt = 11
7+
detector_backend = 11
8+
9+
#-----------------------------
10+
# Target and backend for opencv_dnn detector
11+
# DNN_TARGET_CPU
12+
# DNN_TARGET_OPENCL
13+
# DNN_TARGET_OPENCL_FP16
14+
# DNN_TARGET_MYRIAD
15+
# DNN_TARGET_CUDA
16+
# DNN_TARGET_CUDA_FP16
17+
ocv_dnn_target = DNN_TARGET_CPU
18+
19+
# DNN_BACKEND_DEFAULT
20+
# DNN_BACKEND_HALIDE
21+
# DNN_BACKEND_INFERENCE_ENGINE
22+
# DNN_BACKEND_OPENCV
23+
# DNN_BACKEND_VKCOM
24+
# DNN_BACKEND_CUDA
25+
# DNN_BACKEND_INFERENCE_ENGINE_NGRAPH
26+
# DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019
27+
ocv_dnn_backend = DNN_BACKEND_OPENCV
28+
29+
#-----------------------------
30+
nn_weights = C:/work/home/mtracker/Multitarget-tracker/data/coco/rfdetr_seg_coco.onnx
31+
nn_config = C:/work/home/mtracker/Multitarget-tracker/data/coco/rfdetr_seg_coco.onnx
32+
class_names = C:/work/home/mtracker/Multitarget-tracker/data/coco/coco_91.names
33+
34+
#-----------------------------
35+
confidence_threshold = 0.5
36+
37+
max_crop_ratio = 0
38+
max_batch = 1
39+
gpu_id = 0
40+
41+
#-----------------------------
42+
# YOLOV3
43+
# YOLOV4
44+
# YOLOV5
45+
net_type = RFDETR_IS
46+
47+
#-----------------------------
48+
# INT8
49+
# FP16
50+
# FP32
51+
inference_precision = FP16
52+
53+
54+
[tracking]
55+
56+
#-----------------------------
57+
# DistCenters = 0 // Euclidean distance between centers, pixels
58+
# DistRects = 1 // Euclidean distance between bounding rectangles, pixels
59+
# DistJaccard = 2 // Intersection over Union, IoU, [0, 1]
60+
# DistHist = 3 // Bhatacharia distance between histograms, [0, 1]
61+
62+
distance_type = 0
63+
64+
#-----------------------------
65+
# KalmanLinear = 0
66+
# KalmanUnscented = 1
67+
68+
kalman_type = 0
69+
70+
#-----------------------------
71+
# FilterCenter = 0
72+
# FilterRect = 1
73+
# FilterRRect = 2
74+
75+
filter_goal = 0
76+
77+
#-----------------------------
78+
# TrackNone = 0
79+
# TrackKCF = 1
80+
# TrackMIL = 2
81+
# TrackMedianFlow = 3
82+
# TrackGOTURN = 4
83+
# TrackMOSSE = 5
84+
# TrackCSRT = 6
85+
# TrackDAT = 7
86+
# TrackSTAPLE = 8
87+
# TrackLDES = 9
88+
# TrackDaSiamRPN = 10
89+
# Used if filter_goal == FilterRect
90+
91+
lost_track_type = 0
92+
93+
#-----------------------------
94+
# MatchHungrian = 0
95+
# MatchBipart = 1
96+
97+
match_type = 0
98+
99+
#-----------------------------
100+
# Use constant acceleration motion model:
101+
# 0 - unused (stable)
102+
# 1 - use acceleration in Kalman filter (experimental)
103+
use_aceleration = 0
104+
105+
#-----------------------------
106+
# Delta time for Kalman filter
107+
delta_time = 0.4
108+
109+
#-----------------------------
110+
# Accel noise magnitude for Kalman filter
111+
accel_noise = 0.2
112+
113+
#-----------------------------
114+
# Distance threshold between region and object on two frames
115+
dist_thresh = 0.8
116+
117+
#-----------------------------
118+
# If this value > 0 than will be used circle with this radius
119+
# If this value <= 0 than will be used ellipse with size (3*vx, 3*vy), vx and vy - horizontal and vertical speed in pixelsa
120+
min_area_radius_pix = -1
121+
122+
#-----------------------------
123+
# Minimal area radius in ration for object size. Used if min_area_radius_pix < 0
124+
min_area_radius_k = 0.8
125+
126+
#-----------------------------
127+
# If the object do not assignment more than this seconds then it will be removed
128+
max_lost_time = 2
129+
130+
#-----------------------------
131+
# The maximum trajectory length
132+
max_trace_len = 2
133+
134+
#-----------------------------
135+
# Detection abandoned objects
136+
detect_abandoned = 0
137+
# After this time (in seconds) the object is considered abandoned
138+
min_static_time = 5
139+
# After this time (in seconds) the abandoned object will be removed
140+
max_static_time = 25
141+
# Speed in pixels. If speed of object is more that this value than object is non static
142+
max_speed_for_static = 10

src/Detector/BaseDetector.h

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -167,17 +167,25 @@ class BaseDetector
167167
cv::Mat foreground(m_motionMap.size(), CV_8UC1, cv::Scalar(0, 0, 0));
168168
for (const auto& region : m_regions)
169169
{
170+
if (region.m_boxMask.empty())
171+
{
170172
#if (CV_VERSION_MAJOR < 4)
171-
cv::ellipse(foreground, region.m_rrect, cv::Scalar(255, 255, 255), CV_FILLED);
173+
cv::ellipse(foreground, region.m_rrect, cv::Scalar(255, 255, 255), CV_FILLED);
172174
#else
173-
cv::ellipse(foreground, region.m_rrect, cv::Scalar(255, 255, 255), cv::FILLED);
175+
cv::ellipse(foreground, region.m_rrect, cv::Scalar(255, 255, 255), cv::FILLED);
174176
#endif
177+
}
178+
else
179+
{
180+
cv::Rect brect = Clamp(cv::Rect(region.m_brect.x, region.m_brect.y, region.m_boxMask.cols, region.m_boxMask.rows), foreground.size());
181+
region.m_boxMask.copyTo(foreground(brect));
182+
}
175183
}
176184
if (!m_ignoreMask.empty())
177185
cv::bitwise_and(foreground, m_ignoreMask, foreground);
178186
cv::normalize(foreground, m_normFor, 255, 0, cv::NORM_MINMAX, m_motionMap.type());
179187

180-
double alpha = 0.95;
188+
double alpha = 0.9;
181189
cv::addWeighted(m_motionMap, alpha, m_normFor, 1 - alpha, 0, m_motionMap);
182190

183191
const int chans = frame.channels();

src/Detector/OCVDNNDetector.cpp

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,7 @@ bool OCVDNNDetector::Init(const config_t& config)
169169
dictNetType["YOLOV11Mask"] = ModelType::YOLOV11Mask;
170170
dictNetType["YOLOV12"] = ModelType::YOLOV12;
171171
dictNetType["RFDETR"] = ModelType::RFDETR;
172+
dictNetType["RFDETR_IS"] = ModelType::RFDETR_IS;
172173
dictNetType["DFINE"] = ModelType::DFINE;
173174

174175
auto netType = dictNetType.find(net_type->second);
@@ -414,6 +415,10 @@ void OCVDNNDetector::DetectInCrop(const cv::UMat& colorFrame, const cv::Rect& cr
414415
ParseRFDETR(crop, detections, tmpRegions);
415416
break;
416417

418+
case ModelType::RFDETR_IS:
419+
ParseRFDETR_IS(crop, detections, tmpRegions);
420+
break;
421+
417422
case ModelType::DFINE:
418423
ParseDFINE(crop, detections, tmpRegions);
419424
break;
@@ -934,6 +939,70 @@ void OCVDNNDetector::ParseRFDETR(const cv::Rect& crop, std::vector<cv::Mat>& det
934939
}
935940
}
936941

942+
///
943+
/// \brief OCVDNNDetector::ParseRFDETR_IS
944+
/// \param crop
945+
/// \param detections
946+
/// \param tmpRegions
947+
///
948+
void OCVDNNDetector::ParseRFDETR_IS(const cv::Rect& crop, std::vector<cv::Mat>& detections, regions_t& tmpRegions)
949+
{
950+
int rows = detections[0].size[1];
951+
int dimensionsDets = detections[0].size[2];
952+
int dimensionsLabels = detections[1].size[2];
953+
954+
//0: name: input, size : 1x3x560x560
955+
//1: name: dets, size : 1x300x4
956+
//2: name: labels, size : 1x300x91
957+
958+
float* dets = (float*)detections[0].data;
959+
float* labels = (float*)detections[1].data;
960+
961+
float x_factor = crop.width / static_cast<float>(m_inWidth);
962+
float y_factor = crop.height / static_cast<float>(m_inHeight);
963+
964+
auto L2Conf = [](float v)
965+
{
966+
return 1.f / (1.f + std::exp(-v));
967+
};
968+
969+
for (int i = 0; i < rows; ++i)
970+
{
971+
float maxClassScore = L2Conf(labels[0]);
972+
size_t classId = 0;
973+
for (size_t cli = 1; cli < static_cast<size_t>(dimensionsLabels); ++cli)
974+
{
975+
auto conf = L2Conf(labels[cli]);
976+
if (maxClassScore < conf)
977+
{
978+
maxClassScore = conf;
979+
classId = cli;
980+
}
981+
}
982+
if (classId > 0)
983+
--classId;
984+
985+
if (maxClassScore > m_confidenceThreshold)
986+
{
987+
float x = dets[0];
988+
float y = dets[1];
989+
float w = dets[2];
990+
float h = dets[3];
991+
992+
int left = cvRound((x - 0.5f * w) * x_factor);
993+
int top = cvRound((y - 0.5f * h) * y_factor);
994+
995+
int width = cvRound(w * x_factor);
996+
int height = cvRound(h * y_factor);
997+
998+
if (m_classesWhiteList.empty() || m_classesWhiteList.find(T2T(classId)) != std::end(m_classesWhiteList))
999+
tmpRegions.emplace_back(cv::Rect(left + crop.x, top + crop.y, width, height), T2T(classId), static_cast<float>(maxClassScore));
1000+
}
1001+
dets += dimensionsDets;
1002+
labels += dimensionsLabels;
1003+
}
1004+
}
1005+
9371006
///
9381007
/// \brief OCVDNNDetector::ParseDFINE
9391008
/// \param crop

src/Detector/OCVDNNDetector.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ class OCVDNNDetector final : public BaseDetector
5050
YOLOV11Mask,
5151
YOLOV12,
5252
RFDETR,
53+
RFDETR_IS,
5354
DFINE
5455
};
5556

@@ -85,6 +86,7 @@ class OCVDNNDetector final : public BaseDetector
8586
void ParseYOLOv5_8_11_obb(const cv::Rect& crop, std::vector<cv::Mat>& detections, regions_t& tmpRegions);
8687
void ParseYOLOv5_8_11_seg(const cv::Rect& crop, std::vector<cv::Mat>& detections, regions_t& tmpRegions);
8788
void ParseRFDETR(const cv::Rect& crop, std::vector<cv::Mat>& detections, regions_t& tmpRegions);
89+
void ParseRFDETR_IS(const cv::Rect& crop, std::vector<cv::Mat>& detections, regions_t& tmpRegions);
8890
void ParseDFINE(const cv::Rect& crop, std::vector<cv::Mat>& detections, regions_t& tmpRegions);
8991
};
9092

src/Detector/YoloTensorRTDetector.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ bool YoloTensorRTDetector::Init(const config_t& config)
112112
dictNetType["YOLOV11Mask"] = tensor_rt::YOLOV11Mask;
113113
dictNetType["YOLOV12"] = tensor_rt::YOLOV12;
114114
dictNetType["RFDETR"] = tensor_rt::RFDETR;
115+
dictNetType["RFDETR_IS"] = tensor_rt::RFDETR_IS;
115116
dictNetType["DFINE"] = tensor_rt::DFINE;
116117

117118
auto netType = dictNetType.find(net_type->second);

0 commit comments

Comments
 (0)