Skip to content

Commit 62ba5d7

Browse files
dkurtvpisarev
authored andcommitted
Added Halide OpenCL target for deep learning networks (#1246)
1 parent a4a8b84 commit 62ba5d7

18 files changed

+377
-222
lines changed

modules/dnn/include/opencv2/dnn/dnn.hpp

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,8 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
6969
*/
7070
enum Target
7171
{
72-
DNN_TARGET_CPU
72+
DNN_TARGET_CPU,
73+
DNN_TARGET_OPENCL
7374
};
7475

7576
/** @brief Initialize dnn module and built-in layers.
@@ -138,6 +139,11 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
138139

139140
virtual ~BackendWrapper(); //!< Virtual destructor to make polymorphism.
140141

142+
/**
143+
* @brief Transfer data to CPU host memory.
144+
*/
145+
virtual void copyToHost() = 0;
146+
141147
int backendId; //!< Backend identifier.
142148
int targetId; //!< Target identifier.
143149
};
@@ -220,14 +226,16 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
220226
* @param[in] node Backend node with Halide functions.
221227
* @param[in] inputs Blobs that will be used in forward invocations.
222228
* @param[in] outputs Blobs that will be used in forward invocations.
223-
* @see BackendNode
229+
* @param[in] targetId Target identifier
230+
* @see BackendNode, Target
224231
*
225232
* Layer don't use own Halide::Func members because we can have applied
226233
* layers fusing. In this way the fused function should be scheduled.
227234
*/
228235
virtual void applyHalideScheduler(Ptr<BackendNode>& node,
229236
const std::vector<Mat*> &inputs,
230-
const std::vector<Mat> &outputs) const;
237+
const std::vector<Mat> &outputs,
238+
int targetId) const;
231239

232240
/**
233241
* @brief Implement layers fusing.
@@ -394,6 +402,13 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
394402
*/
395403
void setPreferableBackend(int backendId);
396404

405+
/**
406+
* @brief Ask network to make computations on specific target device.
407+
* @param[in] targetId target identifier.
408+
* @see Target
409+
*/
410+
void setPreferableTarget(int targetId);
411+
397412
/** @brief Sets the new value for the layer output blob
398413
* @param name descriptor of the updating layer output blob.
399414
* @param blob new blob.

modules/dnn/perf/perf_halide_net.cpp

Lines changed: 99 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -41,114 +41,131 @@ static void loadNet(std::string weights, std::string proto, std::string schedule
4141

4242
net->setInput(blobFromImage(input, 1.0, false));
4343
net->setPreferableBackend(DNN_BACKEND_HALIDE);
44+
net->setPreferableTarget(targetId);
4445
net->setHalideScheduler(scheduler);
4546
net->forward(outputLayer);
4647
}
4748

49+
////////////////////////////////////////////////////////////////////////////////
50+
// CPU target
51+
////////////////////////////////////////////////////////////////////////////////
4852
PERF_TEST(GoogLeNet, HalidePerfTest)
4953
{
50-
try {
51-
Net net;
52-
loadNet("dnn/bvlc_googlenet.caffemodel", "dnn/bvlc_googlenet.prototxt",
53-
"", 227, 227, "prob", "caffe", DNN_TARGET_CPU, &net);
54-
55-
TEST_CYCLE_N(10)
56-
{
57-
net.forward();
58-
}
59-
SANITY_CHECK_NOTHING();
60-
} catch (SkipTestException& e) {
61-
throw PerfSkipTestException();
62-
}
54+
Net net;
55+
loadNet("dnn/bvlc_googlenet2.caffemodel", "dnn/bvlc_googlenet.prototxt",
56+
"", 227, 227, "prob", "caffe", DNN_TARGET_CPU, &net);
57+
TEST_CYCLE() net.forward();
58+
SANITY_CHECK_NOTHING();
6359
}
6460

6561
PERF_TEST(AlexNet, HalidePerfTest)
6662
{
67-
try {
68-
Net net;
69-
loadNet("dnn/bvlc_alexnet.caffemodel", "dnn/bvlc_alexnet.prototxt",
70-
"dnn/halide_scheduler_alexnet.yml", 227, 227, "prob", "caffe",
71-
DNN_TARGET_CPU, &net);
72-
73-
TEST_CYCLE_N(10)
74-
{
75-
net.forward();
76-
}
77-
SANITY_CHECK_NOTHING();
78-
} catch (SkipTestException& e) {
79-
throw PerfSkipTestException();
80-
}
63+
Net net;
64+
loadNet("dnn/bvlc_alexnet.caffemodel", "dnn/bvlc_alexnet.prototxt",
65+
"dnn/halide_scheduler_alexnet.yml", 227, 227, "prob", "caffe",
66+
DNN_TARGET_CPU, &net);
67+
TEST_CYCLE() net.forward();
68+
SANITY_CHECK_NOTHING();
8169
}
8270

8371
PERF_TEST(ResNet50, HalidePerfTest)
8472
{
85-
try {
86-
Net net;
87-
loadNet("dnn/ResNet-50-model.caffemodel", "dnn/ResNet-50-deploy.prototxt",
88-
"dnn/halide_scheduler_resnet_50.yml", 224, 224, "prob", "caffe",
89-
DNN_TARGET_CPU, &net);
90-
91-
TEST_CYCLE_N(10)
92-
{
93-
net.forward();
94-
}
95-
SANITY_CHECK_NOTHING();
96-
} catch (SkipTestException& e) {
97-
throw PerfSkipTestException();
98-
}
73+
Net net;
74+
loadNet("dnn/ResNet-50-model.caffemodel", "dnn/ResNet-50-deploy.prototxt",
75+
"dnn/halide_scheduler_resnet_50.yml", 224, 224, "prob", "caffe",
76+
DNN_TARGET_CPU, &net);
77+
TEST_CYCLE() net.forward();
78+
SANITY_CHECK_NOTHING();
9979
}
10080

10181
PERF_TEST(SqueezeNet_v1_1, HalidePerfTest)
10282
{
103-
try {
104-
Net net;
105-
loadNet("dnn/squeezenet_v1_1.caffemodel", "dnn/squeezenet_v1_1.prototxt",
106-
"dnn/halide_scheduler_squeezenet_v1_1.yml", 227, 227, "prob",
107-
"caffe", DNN_TARGET_CPU, &net);
108-
109-
TEST_CYCLE_N(10)
110-
{
111-
net.forward();
112-
}
113-
SANITY_CHECK_NOTHING();
114-
} catch (SkipTestException& e) {
115-
throw PerfSkipTestException();
116-
}
83+
Net net;
84+
loadNet("dnn/squeezenet_v1_1.caffemodel", "dnn/squeezenet_v1_1.prototxt",
85+
"dnn/halide_scheduler_squeezenet_v1_1.yml", 227, 227, "prob",
86+
"caffe", DNN_TARGET_CPU, &net);
87+
TEST_CYCLE() net.forward();
88+
SANITY_CHECK_NOTHING();
11789
}
11890

11991
PERF_TEST(Inception_5h, HalidePerfTest)
12092
{
121-
try {
122-
Net net;
123-
loadNet("dnn/tensorflow_inception_graph.pb", "",
124-
"dnn/halide_scheduler_inception_5h.yml",
125-
224, 224, "softmax2", "tensorflow", DNN_TARGET_CPU, &net);
126-
127-
TEST_CYCLE_N(10)
128-
{
129-
net.forward("softmax2");
130-
}
131-
SANITY_CHECK_NOTHING();
132-
} catch (SkipTestException& e) {
133-
throw PerfSkipTestException();
134-
}
93+
Net net;
94+
loadNet("dnn/tensorflow_inception_graph.pb", "",
95+
"dnn/halide_scheduler_inception_5h.yml",
96+
224, 224, "softmax2", "tensorflow", DNN_TARGET_CPU, &net);
97+
TEST_CYCLE() net.forward("softmax2");
98+
SANITY_CHECK_NOTHING();
13599
}
136100

137101
PERF_TEST(ENet, HalidePerfTest)
138102
{
139-
try {
140-
Net net;
141-
loadNet("dnn/Enet-model-best.net", "", "dnn/halide_scheduler_enet.yml",
142-
512, 256, "l367_Deconvolution", "torch", DNN_TARGET_CPU, &net);
143-
144-
TEST_CYCLE_N(10)
145-
{
146-
net.forward("l367_Deconvolution");
147-
}
148-
SANITY_CHECK_NOTHING();
149-
} catch (SkipTestException& e) {
150-
throw PerfSkipTestException();
151-
}
103+
Net net;
104+
loadNet("dnn/Enet-model-best.net", "", "dnn/halide_scheduler_enet.yml",
105+
512, 256, "l367_Deconvolution", "torch", DNN_TARGET_CPU, &net);
106+
TEST_CYCLE() net.forward();
107+
SANITY_CHECK_NOTHING();
108+
}
109+
////////////////////////////////////////////////////////////////////////////////
110+
// OpenCL target
111+
////////////////////////////////////////////////////////////////////////////////
112+
PERF_TEST(GoogLeNet_opencl, HalidePerfTest)
113+
{
114+
Net net;
115+
loadNet("dnn/bvlc_googlenet.caffemodel", "dnn/bvlc_googlenet.prototxt",
116+
"", 227, 227, "prob", "caffe", DNN_TARGET_OPENCL, &net);
117+
TEST_CYCLE() net.forward();
118+
SANITY_CHECK_NOTHING();
119+
}
120+
121+
PERF_TEST(AlexNet_opencl, HalidePerfTest)
122+
{
123+
Net net;
124+
loadNet("dnn/bvlc_alexnet.caffemodel", "dnn/bvlc_alexnet.prototxt",
125+
"dnn/halide_scheduler_opencl_alexnet.yml", 227, 227, "prob", "caffe",
126+
DNN_TARGET_OPENCL, &net);
127+
TEST_CYCLE() net.forward();
128+
SANITY_CHECK_NOTHING();
129+
}
130+
131+
PERF_TEST(ResNet50_opencl, HalidePerfTest)
132+
{
133+
Net net;
134+
loadNet("dnn/ResNet-50-model.caffemodel", "dnn/ResNet-50-deploy.prototxt",
135+
"dnn/halide_scheduler_opencl_resnet_50.yml", 224, 224, "prob", "caffe",
136+
DNN_TARGET_OPENCL, &net);
137+
TEST_CYCLE() net.forward();
138+
SANITY_CHECK_NOTHING();
139+
}
140+
141+
142+
PERF_TEST(SqueezeNet_v1_1_opencl, HalidePerfTest)
143+
{
144+
Net net;
145+
loadNet("dnn/squeezenet_v1_1.caffemodel", "dnn/squeezenet_v1_1.prototxt",
146+
"dnn/halide_scheduler_opencl_squeezenet_v1_1.yml", 227, 227, "prob",
147+
"caffe", DNN_TARGET_OPENCL, &net);
148+
TEST_CYCLE() net.forward();
149+
SANITY_CHECK_NOTHING();
150+
}
151+
152+
PERF_TEST(Inception_5h_opencl, HalidePerfTest)
153+
{
154+
Net net;
155+
loadNet("dnn/tensorflow_inception_graph.pb", "",
156+
"dnn/halide_scheduler_opencl_inception_5h.yml",
157+
224, 224, "softmax2", "tensorflow", DNN_TARGET_OPENCL, &net);
158+
TEST_CYCLE() net.forward("softmax2");
159+
SANITY_CHECK_NOTHING();
160+
}
161+
162+
PERF_TEST(ENet_opencl, HalidePerfTest)
163+
{
164+
Net net;
165+
loadNet("dnn/Enet-model-best.net", "", "dnn/halide_scheduler_opencl_enet.yml",
166+
512, 256, "l367_Deconvolution", "torch", DNN_TARGET_OPENCL, &net);
167+
TEST_CYCLE() net.forward();
168+
SANITY_CHECK_NOTHING();
152169
}
153170
#endif // HAVE_HALIDE
154171

0 commit comments

Comments
 (0)