Skip to content

Commit ef3b465

Browse files
authored
C++ Demo - Object Tracking (VitTrack) (#240)
* Preliminary attempt at C++ demo. * Update README documentation * Fixed text spacing and removed unused arguments. Cleaned up to not use tuple. * Update offsets to match C++ to prevent overlapping text * Add help functionality * Add using namespace for standalone C++ demo file for readability. * Update formatting and add save/visualization functionality * More formatting changes
1 parent a45b893 commit ef3b465

File tree

4 files changed

+287
-11
lines changed

4 files changed

+287
-11
lines changed
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
cmake_minimum_required(VERSION 3.24)
2+
set(project_name "opencv_zoo_object_tracking_vittrack")
3+
4+
PROJECT (${project_name})
5+
6+
set(OPENCV_VERSION "4.9.0")
7+
set(OPENCV_INSTALLATION_PATH "" CACHE PATH "Where to look for OpenCV installation")
8+
find_package(OpenCV ${OPENCV_VERSION} REQUIRED HINTS ${OPENCV_INSTALLATION_PATH})
9+
# Find OpenCV, you may need to set OpenCV_DIR variable
10+
# to the absolute path to the directory containing OpenCVConfig.cmake file
11+
# via the command line or GUI
12+
13+
file(GLOB SourceFile
14+
"demo.cpp")
15+
# If the package has been found, several variables will
16+
# be set, you can find the full list with descriptions
17+
# in the OpenCVConfig.cmake file.
18+
# Print some message showing some of them
19+
message(STATUS "OpenCV library status:")
20+
message(STATUS " config: ${OpenCV_DIR}")
21+
message(STATUS " version: ${OpenCV_VERSION}")
22+
message(STATUS " libraries: ${OpenCV_LIBS}")
23+
message(STATUS " include path: ${OpenCV_INCLUDE_DIRS}")
24+
25+
# Declare the executable target built from your sources
26+
add_executable(${project_name} ${SourceFile})
27+
28+
# Set C++ compilation standard to C++11
29+
set(CMAKE_CXX_STANDARD 11)
30+
31+
# Link your application with OpenCV libraries
32+
target_link_libraries(${project_name} PRIVATE ${OpenCV_LIBS})

models/object_tracking_vittrack/README.md

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,34 @@ This model is contributed by [Pengyu Liu](https://github.com/lpylpy0514) in GSoC
1111
**NOTE: OpenCV > 4.8.0 is required. Build from source with instructions from https://opencv.org/get-started/.**
1212

1313
# Demo
14-
14+
## Python
1515
```bash
16+
# tracking on camera input
17+
python demo.py
18+
1619
# tracking on video
1720
python demo.py --input /path/to/video
1821

1922
# get help regarding various parameters
2023
python demo.py --help
2124
```
25+
## C++
26+
Install latest OpenCV and CMake >= 3.24.0 to get started.
27+
28+
```shell
29+
# A typical and default installation path of OpenCV is /usr/local
30+
cmake -B build -D OPENCV_INSTALLATION_PATH=/path/to/opencv/installation .
31+
cmake --build build
32+
33+
# tracking on camera input
34+
./build/opencv_zoo_object_tracking_vittrack
35+
36+
# tracking on video
37+
./build/opencv_zoo_object_tracking_vittrack -i=/path/to/video
38+
39+
# get help messages
40+
./build/opencv_zoo_object_tracking_vittrack -h
41+
```
2242

2343
# Example outputs
2444

Lines changed: 210 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,210 @@
1+
#include <iostream>
2+
#include <opencv2/opencv.hpp>
3+
4+
using namespace std;
5+
using namespace cv;
6+
using namespace dnn;
7+
8+
struct TrackingResult
9+
{
10+
bool isLocated;
11+
Rect bbox;
12+
float score;
13+
};
14+
15+
class VitTrack
16+
{
17+
public:
18+
19+
VitTrack(const string& model_path, int backend_id = 0, int target_id = 0)
20+
{
21+
params.net = model_path;
22+
params.backend = backend_id;
23+
params.target = target_id;
24+
model = TrackerVit::create(params);
25+
}
26+
27+
void init(const Mat& image, const Rect& roi)
28+
{
29+
model->init(image, roi);
30+
}
31+
32+
TrackingResult infer(const Mat& image)
33+
{
34+
TrackingResult result;
35+
result.isLocated = model->update(image, result.bbox);
36+
result.score = model->getTrackingScore();
37+
return result;
38+
}
39+
40+
private:
41+
TrackerVit::Params params;
42+
Ptr<TrackerVit> model;
43+
};
44+
45+
Mat visualize(const Mat& image, const Rect& bbox, float score, bool isLocated, double fps = -1.0,
46+
const Scalar& box_color = Scalar(0, 255, 0), const Scalar& text_color = Scalar(0, 255, 0),
47+
double fontScale = 1.0, int fontSize = 1)
48+
{
49+
Mat output = image.clone();
50+
int h = output.rows;
51+
int w = output.cols;
52+
53+
if (fps >= 0)
54+
{
55+
putText(output, "FPS: " + to_string(fps), Point(0, 30), FONT_HERSHEY_DUPLEX, fontScale, text_color, fontSize);
56+
}
57+
58+
if (isLocated && score >= 0.3)
59+
{
60+
rectangle(output, bbox, box_color, 2);
61+
putText(output, format("%.2f", score), Point(bbox.x, bbox.y + 25),
62+
FONT_HERSHEY_DUPLEX, fontScale, text_color, fontSize);
63+
}
64+
else
65+
{
66+
Size text_size = getTextSize("Target lost!", FONT_HERSHEY_DUPLEX, fontScale, fontSize, nullptr);
67+
int text_x = (w - text_size.width) / 2;
68+
int text_y = (h - text_size.height) / 2;
69+
putText(output, "Target lost!", Point(text_x, text_y), FONT_HERSHEY_DUPLEX, fontScale, Scalar(0, 0, 255), fontSize);
70+
}
71+
72+
return output;
73+
}
74+
75+
int main(int argc, char** argv)
76+
{
77+
CommandLineParser parser(argc, argv,
78+
"{help h | | Print help message. }"
79+
"{input i | |Set path to the input video. Omit for using default camera.}"
80+
"{model_path |object_tracking_vittrack_2023sep.onnx |Set model path}"
81+
"{backend_target bt |0 |Choose backend-target pair: 0 - OpenCV implementation + CPU, 1 - CUDA + GPU (CUDA), 2 - CUDA + GPU (CUDA FP16), 3 - TIM-VX + NPU, 4 - CANN + NPU}"
82+
"{save s |false |Specify to save a file with results.}"
83+
"{vis v |true |Specify to open a new window to show results.}");
84+
if (parser.has("help"))
85+
{
86+
parser.printMessage();
87+
return 0;
88+
}
89+
90+
string input = parser.get<string>("input");
91+
string model_path = parser.get<string>("model_path");
92+
int backend_target = parser.get<int>("backend_target");
93+
bool save = parser.get<bool>("save");
94+
bool vis = parser.get<bool>("vis");
95+
96+
vector<vector<int>> backend_target_pairs =
97+
{
98+
{DNN_BACKEND_OPENCV, DNN_TARGET_CPU},
99+
{DNN_BACKEND_CUDA, DNN_TARGET_CUDA},
100+
{DNN_BACKEND_CUDA, DNN_TARGET_CUDA_FP16},
101+
{DNN_BACKEND_TIMVX, DNN_TARGET_NPU},
102+
{DNN_BACKEND_CANN, DNN_TARGET_NPU}
103+
};
104+
105+
int backend_id = backend_target_pairs[backend_target][0];
106+
int target_id = backend_target_pairs[backend_target][1];
107+
108+
// Create VitTrack tracker
109+
VitTrack tracker(model_path, backend_id, target_id);
110+
111+
// Open video capture
112+
VideoCapture video;
113+
if (input.empty())
114+
{
115+
video.open(0); // Default camera
116+
}
117+
else
118+
{
119+
video.open(input);
120+
}
121+
122+
if (!video.isOpened())
123+
{
124+
cerr << "Error: Could not open video source" << endl;
125+
return -1;
126+
}
127+
128+
// Select an object
129+
Mat first_frame;
130+
video >> first_frame;
131+
132+
if (first_frame.empty())
133+
{
134+
cerr << "No frames grabbed!" << endl;
135+
return -1;
136+
}
137+
138+
Mat first_frame_copy = first_frame.clone();
139+
putText(first_frame_copy, "1. Drag a bounding box to track.", Point(0, 25), FONT_HERSHEY_SIMPLEX, 1, Scalar(0, 255, 0));
140+
putText(first_frame_copy, "2. Press ENTER to confirm", Point(0, 50), FONT_HERSHEY_SIMPLEX, 1, Scalar(0, 255, 0));
141+
Rect roi = selectROI("VitTrack Demo", first_frame_copy);
142+
143+
if (roi.area() == 0)
144+
{
145+
cerr << "No ROI is selected! Exiting..." << endl;
146+
return -1;
147+
}
148+
else
149+
{
150+
cout << "Selected ROI: " << roi << endl;
151+
}
152+
153+
// Create VideoWriter if save option is specified
154+
VideoWriter output_video;
155+
if (save)
156+
{
157+
Size frame_size = first_frame.size();
158+
output_video.open("output.mp4", VideoWriter::fourcc('m', 'p', '4', 'v'), video.get(CAP_PROP_FPS), frame_size);
159+
if (!output_video.isOpened())
160+
{
161+
cerr << "Error: Could not create output video stream" << endl;
162+
return -1;
163+
}
164+
}
165+
166+
// Initialize tracker with ROI
167+
tracker.init(first_frame, roi);
168+
169+
// Track frame by frame
170+
TickMeter tm;
171+
while (waitKey(1) < 0)
172+
{
173+
video >> first_frame;
174+
if (first_frame.empty())
175+
{
176+
cout << "End of video" << endl;
177+
break;
178+
}
179+
180+
// Inference
181+
tm.start();
182+
TrackingResult result = tracker.infer(first_frame);
183+
tm.stop();
184+
185+
// Visualize
186+
Mat frame = first_frame.clone();
187+
frame = visualize(frame, result.bbox, result.score, result.isLocated, tm.getFPS());
188+
189+
if (save)
190+
{
191+
output_video.write(frame);
192+
}
193+
194+
if (vis)
195+
{
196+
imshow("VitTrack Demo", frame);
197+
}
198+
tm.reset();
199+
}
200+
201+
if (save)
202+
{
203+
output_video.release();
204+
}
205+
206+
video.release();
207+
destroyAllWindows();
208+
209+
return 0;
210+
}

models/object_tracking_vittrack/demo.py

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -35,12 +35,11 @@
3535
{:d}: TIM-VX + NPU,
3636
{:d}: CANN + NPU
3737
'''.format(*[x for x in range(len(backend_target_pairs))]))
38-
parser.add_argument('--save', '-s', action='store_true',
39-
help='Usage: Specify to save a file with results. Invalid in case of camera input.')
40-
parser.add_argument('--vis', '-v', action='store_true',
41-
help='Usage: Specify to open a new window to show results. Invalid in case of camera input.')
38+
parser.add_argument('--save', '-s', action='store_true', default=False,
39+
help='Usage: Specify to save a file with results.')
40+
parser.add_argument('--vis', '-v', action='store_true', default=True,
41+
help='Usage: Specify to open a new window to show results.')
4242
args = parser.parse_args()
43-
4443
def visualize(image, bbox, score, isLocated, fps=None, box_color=(0, 255, 0),text_color=(0, 255, 0), fontScale = 1, fontSize = 1):
4544
output = image.copy()
4645
h, w, _ = output.shape
@@ -80,16 +79,21 @@ def visualize(image, bbox, score, isLocated, fps=None, box_color=(0, 255, 0),tex
8079
print('No frames grabbed!')
8180
exit()
8281
first_frame_copy = first_frame.copy()
83-
cv.putText(first_frame_copy, "1. Drag a bounding box to track.", (0, 15), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0))
84-
cv.putText(first_frame_copy, "2. Press ENTER to confirm", (0, 35), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0))
85-
roi = cv.selectROI('vitTrack Demo', first_frame_copy)
82+
cv.putText(first_frame_copy, "1. Drag a bounding box to track.", (0, 25), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0))
83+
cv.putText(first_frame_copy, "2. Press ENTER to confirm", (0, 50), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0))
84+
roi = cv.selectROI('VitTrack Demo', first_frame_copy)
8685

8786
if np.all(np.array(roi) == 0):
88-
print("No roi is selected! Exiting ...")
87+
print("No ROI is selected! Exiting ...")
8988
exit()
9089
else:
9190
print("Selected ROI: {}".format(roi))
9291

92+
if args.save:
93+
fps = video.get(cv.CAP_PROP_FPS)
94+
frame_size = (first_frame.shape[1], first_frame.shape[0])
95+
output_video = cv.VideoWriter('output.mp4', cv.VideoWriter_fourcc(*'mp4v'), fps, frame_size)
96+
9397
# Init tracker with ROI
9498
model.init(first_frame, roi)
9599

@@ -106,5 +110,15 @@ def visualize(image, bbox, score, isLocated, fps=None, box_color=(0, 255, 0),tex
106110
tm.stop()
107111
# Visualize
108112
frame = visualize(frame, bbox, score, isLocated, fps=tm.getFPS())
109-
cv.imshow('VitTrack Demo', frame)
113+
if args.save:
114+
output_video.write(frame)
115+
116+
if args.vis:
117+
cv.imshow('VitTrack Demo', frame)
110118
tm.reset()
119+
120+
if args.save:
121+
output_video.release()
122+
123+
video.release()
124+
cv.destroyAllWindows()

0 commit comments

Comments
 (0)