Fixed and added tests (#987)

arrybn · vpisarev · commit 9c6c69a4e4e6 · 2017-02-08T16:38:05.000+03:00
diff --git a/modules/dnn/CMakeLists.txt b/modules/dnn/CMakeLists.txt
@@ -48,11 +48,20 @@ ocv_add_perf_tests()
 # ----------------------------------------------------------------------------
 # Download pre-trained models for complex testing on GoogLeNet and AlexNet
 # ----------------------------------------------------------------------------
-OCV_OPTION(${the_module}_DOWNLOAD_CAFFE_MODELS "Use GoogLeNet Caffe model for testing" OFF IF BUILD_TESTS AND DEFINED ENV{OPENCV_TEST_DATA_PATH})
-if(BUILD_TESTS AND DEFINED ENV{OPENCV_TEST_DATA_PATH} AND (DOWNLOAD_EXTERNAL_TEST_DATA OR ${the_module}_DOWNLOAD_CAFFE_MODELS))
+OCV_OPTION(${the_module}_DOWNLOAD_MODELS "Use GoogLeNet Caffe model for testing" OFF IF BUILD_TESTS AND DEFINED ENV{OPENCV_TEST_DATA_PATH})
+if(BUILD_TESTS AND DEFINED ENV{OPENCV_TEST_DATA_PATH} AND (DOWNLOAD_EXTERNAL_TEST_DATA OR ${the_module}_DOWNLOAD_MODELS))
     add_custom_command( TARGET opencv_test_${name} POST_BUILD
                         COMMAND ${CMAKE_COMMAND} -Dmodel=GoogleNet -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/download_model.cmake)
+    add_custom_command( TARGET opencv_test_${name} POST_BUILD
+                        COMMAND ${CMAKE_COMMAND} -Dmodel=Alexnet -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/download_model.cmake)
+    add_custom_command( TARGET opencv_test_${name} POST_BUILD
+                        COMMAND ${CMAKE_COMMAND} -Dmodel=Inception -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/download_model.cmake)
+    add_custom_command( TARGET opencv_test_${name} POST_BUILD
+                        COMMAND ${CMAKE_COMMAND} -Dmodel=Enet -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/download_model.cmake)
     add_definitions(-DENABLE_CAFFE_MODEL_TESTS=1)
+    add_definitions(-DENABLE_CAFFE_ALEXNET_TEST=1)
+    add_definitions(-DENABLE_TF_INCEPTION_TESTS=1)
+    add_definitions(-DENABLE_TORCH_ENET_TESTS=1)
 endif()
 
 # ----------------------------------------------------------------------------
diff --git a/modules/dnn/cmake/download_model.cmake b/modules/dnn/cmake/download_model.cmake
@@ -8,6 +8,19 @@ set(GG16_dst "$ENV{OPENCV_TEST_DATA_PATH}/dnn/VGG_ILSVRC_16_layers.caffemodel")
 set(voc-fcn32s_url "http://dl.caffe.berkeleyvision.org/fcn32s-heavy-pascal.caffemodel")
 set(voc-fcn32s_dst "$ENV{OPENCV_TEST_DATA_PATH}/dnn/fcn32s-heavy-pascal.caffemodel")
 
+set(Alexnet_url "http://dl.caffe.berkeleyvision.org/bvlc_alexnet.caffemodel")
+set(Alexnet_dst "$ENV{OPENCV_TEST_DATA_PATH}/dnn/bvlc_alexnet.caffemodel")
+set(Alexnet_sha "9116a64c0fbe4459d18f4bb6b56d647b63920377")
+
+set(Inception_url "https://github.com/petewarden/tf_ios_makefile_example/raw/master/data/tensorflow_inception_graph.pb")
+set(Inception_dst "$ENV{OPENCV_TEST_DATA_PATH}/dnn/tensorflow_inception_graph.pb")
+
+set(Enet_url "https://www.dropbox.com/sh/dywzk3gyb12hpe5/AABoUwqQGWvClUu27Z1EWeu9a/model-best.net?dl=0")
+set(Enet_dst "$ENV{OPENCV_TEST_DATA_PATH}/dnn/Enet-model-best.net")
+
+set(Fcn_url "http://dl.caffe.berkeleyvision.org/fcn8s-heavy-pascal.caffemodel")
+set(Fcn_dst "$ENV{OPENCV_TEST_DATA_PATH}/dnn/fcn8s-heavy-pascal.caffemodel")
+
 if(NOT model)
     set(model "GoogleNet")
 endif()
diff --git a/modules/dnn/samples/caffe_googlenet.cpp b/modules/dnn/samples/caffe_googlenet.cpp
@@ -115,6 +115,7 @@ int main(int argc, char **argv)
     }
 
     resize(img, img, Size(224, 224));                   //GoogLeNet accepts only 224x224 RGB-images
+    cv::cvtColor(img, img, cv::COLOR_BGR2RGB);
     dnn::Blob inputBlob = dnn::Blob::fromImages(img);   //Convert Mat to dnn::Blob batch of images
     //! [Prepare blob]
 
diff --git a/modules/dnn/src/caffe/caffe.proto b/modules/dnn/src/caffe/caffe.proto
@@ -425,7 +425,7 @@ message LayerParameter {
 
   // The blobs containing the numeric parameters of the layer.
   repeated BlobProto blobs = 7;
-  
+
   // Specifies on which bottoms the backpropagation should be skipped.
   // The size must be either 0 or equal to the number of bottoms.
   repeated bool propagate_down = 11;
@@ -571,7 +571,7 @@ message ContrastiveLossParameter {
   // Hadsell paper. New models should probably use this version.
   // legacy_version = true uses (margin - d^2). This is kept to support /
   // reproduce existing models and results
-  optional bool legacy_version = 2 [default = false]; 
+  optional bool legacy_version = 2 [default = false];
 }
 
 message ConvolutionParameter {
diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp
@@ -111,6 +111,7 @@ void BaseConvolutionLayerImpl::allocate(const std::vector<Blob*> &inputs, std::v
     if (!is1x1())
     {
         colRowBlob.create(colRowBlobShape, input.type(), allocFlags);
+        colRowBlob.setTo(0);
     }
 }
 
@@ -250,11 +251,11 @@ void ConvolutionLayerImpl::im2row(const  Mat &srcImg,  Mat &dstRow)
     if (srcImg.type() == CV_32F)
         im2row_CpuPBody<float>::run(srcImg.ptr<float>(), inpGroupCn, inpH, inpW, kernel.height,
                                     kernel.width, pad.height, pad.width, stride.height, stride.width,
-                                    dilation.height, dilation.width, outW, outH, colMat.ptr<float>());
+                                    dilation.height, dilation.width, outH, outW, colMat.ptr<float>());
     if (srcImg.type() == CV_64F)
         im2row_CpuPBody<double>::run(srcImg.ptr<double>(), inpGroupCn, inpH, inpW, kernel.height,
                                      kernel.width, pad.height, pad.width, stride.height, stride.width,
-                                     dilation.height, dilation.width, outW, outH, colMat.ptr<double>());
+                                     dilation.height, dilation.width, outH, outW, colMat.ptr<double>());
 
     dstRow = colMat;
 }
@@ -268,11 +269,9 @@ void ConvolutionLayerImpl::im2row(const UMat &srcImg, UMat &dstCol)
 
 void DeConvolutionLayerImpl::computeInpOutShape(const Blob &inpBlob)
 {
-    BlobShape bs0 = blobs[0].shape();
-    BlobShape bs1 = blobs[1].shape();
-    CV_Assert(!bias || blobs[1].total() == (size_t)blobs[0].channels());
+    CV_Assert(!bias || blobs[1].total() == (size_t)blobs[0].num());
 
-    numOutput = blobs[0].channels();
+    numOutput = blobs[0].num();
 
     inpH = inpBlob.rows();
     inpW = inpBlob.cols();
@@ -282,13 +281,13 @@ void DeConvolutionLayerImpl::computeInpOutShape(const Blob &inpBlob)
     outW = stride.width * (inpW - 1) + kernel.width - 2 * pad.width + adjustPad.width;
     outCn = numOutput;
 
-    group = inpCn / blobs[0].num();
+    group = inpCn / blobs[0].channels();
     outGroupCn = outCn / group;
     inpGroupCn = inpCn / group;
     ksize = outGroupCn * kernel.height * kernel.width;
 
     CV_Assert(inpCn % group == 0 && outCn % group == 0);
-    CV_Assert(blobs[0].channels() == outCn && blobs[0].num() == inpCn / group);
+    CV_Assert(blobs[0].num() == outCn && blobs[0].channels() == inpCn / group);
 
     colRowBlobShape = BlobShape(ksize, inpH * inpW);
 }
diff --git a/modules/dnn/src/layers/lrn_layer.cpp b/modules/dnn/src/layers/lrn_layer.cpp
@@ -106,23 +106,23 @@ static XMat getPlane(XMat &m, int n, int cn)
 void LRNLayerImpl::channelNoramlization(Blob &src, Blob &dst)
 {
     if (!useOpenCL)
-        channelNoramlization_<Mat>(src, dst);
+        channelNormalization_<Mat>(src, dst);
     else
     {
         //channelNoramlization_ocl(src.getRefConst<UMat>(), dst.getRef<UMat>()); //consumes a lot of memory
-        channelNoramlization_<UMat>(src, dst);
+        channelNormalization_<UMat>(src, dst);
     }
 }
 
 template<typename XMat>
-void LRNLayerImpl::channelNoramlization_(Blob &srcBlob, Blob &dstBlob)
+void LRNLayerImpl::channelNormalization_(Blob &srcBlob, Blob &dstBlob)
 {
     int num = srcBlob.num();
     int channels = srcBlob.channels();
     int ksize = (size - 1) / 2;
     int sizeNormFactor = normBySize ? size : 1;
 
-    XMat srcMat = srcBlob.getRefConst<XMat>();
+    XMat srcMat = srcBlob.getRefConst<XMat>().clone();
     XMat dstMat = dstBlob.getRef<XMat>();
 
     for (int n = 0; n < num; n++)
@@ -156,7 +156,7 @@ void LRNLayerImpl::channelNoramlization_(Blob &srcBlob, Blob &dstBlob)
     }
 }
 
-bool LRNLayerImpl::channelNoramlization_ocl(const UMat &src, UMat &dst)
+bool LRNLayerImpl::channelNormalization_ocl(const UMat &src, UMat &dst)
 {
 #ifdef HAVE_OPENCL
     if (src.offset != 0 || dst.offset != 0) //TODO: add offset
diff --git a/modules/dnn/src/layers/lrn_layer.hpp b/modules/dnn/src/layers/lrn_layer.hpp
@@ -56,8 +56,8 @@ class LRNLayerImpl : public LRNLayer
 
     void channelNoramlization(Blob &src, Blob &dst);
     template<typename XMat>
-    void channelNoramlization_(Blob &src, Blob &dst);
-    bool channelNoramlization_ocl(const UMat &src, UMat &dst);
+    void channelNormalization_(Blob &src, Blob &dst);
+    bool channelNormalization_ocl(const UMat &src, UMat &dst);
 
     void spatialNormalization(Blob &src, Blob &dst);
     template<typename XMat>
@@ -67,8 +67,8 @@ class LRNLayerImpl : public LRNLayer
 
 public:
 
-    LRNLayerImpl(int type = CHANNEL_NRM, int size = 5, double alpha = 1, double beta = 0.75, double bias = 1,
-                 bool normBySize = true);
+    LRNLayerImpl(int type = CHANNEL_NRM, int size = 5, double alpha = 1,
+                 double beta = 0.75, double bias = 1, bool normBySize = true);
     void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
     void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
 };
diff --git a/modules/dnn/src/layers/max_unpooling_layer.cpp b/modules/dnn/src/layers/max_unpooling_layer.cpp
@@ -44,6 +44,7 @@ void MaxUnpoolLayerImpl::forward(std::vector<Blob*> &inputs, std::vector<Blob> &
     for(int i_n = 0; i_n < outputs.size(); i_n++)
     {
         Blob& outBlob = outputs[i_n];
+        outBlob.setTo(0);
         CV_Assert(input.channels() == outBlob.channels());
 
         for (int i_c = 0; i_c < input.channels(); i_c++)
diff --git a/modules/dnn/src/layers/reshape_layer.cpp b/modules/dnn/src/layers/reshape_layer.cpp
@@ -73,7 +73,7 @@ void ReshapeLayerImpl::forward(std::vector<Blob*> &inputs, std::vector<Blob> &ou
 {
     for (size_t i = 0; i < outputs.size(); i++)
     {
-        Blob& srcBlob = *inputs[i];
+        Blob srcBlob = *inputs[i];
         BlobShape inputShape = inputs[i]->shape();
         bool channelsReduced = inputShape.dims() > outShapes[i].dims() ||
                 (inputShape.dims() == 4 && inputShape[1] > outShapes[i][1]);
diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp
@@ -640,7 +640,7 @@ void TFImporter::populateNet(Net dstNet)
             if(hasLayerAttr(layer, "bias")) {
                 layerParams.set("bias", getLayerAttr(layer, "bias").f());
             }
-            layerParams.set("norm_sz", false);
+            layerParams.set("norm_by_size", false);
 
             int id = dstNet.addLayer(name, "LRN", layerParams);
             layer_id[name] = id;
diff --git a/modules/dnn/src/torch/torch_importer.cpp b/modules/dnn/src/torch/torch_importer.cpp
@@ -375,6 +375,7 @@ struct TorchImporter : public ::cv::dnn::Importer
             int typeStorage = parseStorageType(className);
             CV_Assert(typeStorage >= 0 && typeTensor == typeStorage);
             readTorchStorage(indexStorage, typeStorage);
+            typeTensor = storages[indexStorage].type();
             readedIndexes.insert(indexStorage);
         }
 
@@ -723,7 +724,10 @@ struct TorchImporter : public ::cv::dnn::Importer
                 layerParams.set("adj_h", static_cast<int>(scalarParams.get<double>("adjH")));
                 layerParams.set("num_output", static_cast<int>(scalarParams.get<double>("nOutputPlane")));
 
-                layerParams.blobs.push_back(tensorParams["weight"].second);
+                Blob weights = tensorParams["weight"].second;
+                BlobShape shape = weights.shape(),
+                        reorderedShape = BlobShape(shape[1], shape[0], shape[2], shape[3]);
+                layerParams.blobs.push_back(weights.reshape(reorderedShape));
 
                 bool bias = tensorParams.count("bias");
                 layerParams.set("bias_term", bias);
diff --git a/modules/dnn/test/test_alexnet.cpp b/modules/dnn/test/test_alexnet.cpp
diff --git a/modules/dnn/test/test_caffe_importer.cpp b/modules/dnn/test/test_caffe_importer.cpp
@@ -40,6 +40,7 @@
 //M*/
 
 #include "test_precomp.hpp"
+#include "npy_blob.hpp"
 
 namespace cvtest
 {
@@ -73,4 +74,69 @@ TEST(Test_Caffe, read_googlenet)
     }
 }
 
+#if defined(ENABLE_CAFFE_MODEL_TESTS)
+
+#if defined(ENABLE_CAFFE_ALEXNET_TEST) //AlexNet is disabled now
+
+TEST(Reproducibility_AlexNet, Accuracy)
+{
+    Net net;
+    {
+        Ptr<Importer> importer = createCaffeImporter(_tf("bvlc_alexnet.prototxt"), _tf("bvlc_alexnet.caffemodel"));
+        ASSERT_TRUE(importer != NULL);
+        importer->populateNet(net);
+    }
+
+    Mat sample = imread(_tf("grace_hopper_227.png"));
+    ASSERT_TRUE(!sample.empty());
+    cv::cvtColor(sample, sample, cv::COLOR_BGR2RGB);
+
+    Size inputSize(227, 227);
+
+    if (sample.size() != inputSize)
+        resize(sample, sample, inputSize);
+
+    net.setBlob(".data", dnn::Blob::fromImages(sample));
+    net.forward();
+
+    Blob out = net.getBlob("prob");
+    Blob ref = blobFromNPY(_tf("caffe_alexnet_prob.npy"));
+    normAssert(ref, out);
+}
+
+#endif
+
+#if defined(ENABLE_CAFFE_FCN_TEST)
+
+TEST(Reproducibility_FCN, Accuracy)
+{
+    Net net;
+    {
+        Ptr<Importer> importer = createCaffeImporter(_tf("fcn8s-heavy-pascal.prototxt"), _tf("fcn8s-heavy-pascal.caffemodel"));
+        ASSERT_TRUE(importer != NULL);
+        importer->populateNet(net);
+    }
+
+    Mat sample = imread(_tf("street.png"));
+    ASSERT_TRUE(!sample.empty());
+
+    Size inputSize(500, 500);
+    if (sample.size() != inputSize)
+        resize(sample, sample, inputSize);
+
+    cv::cvtColor(sample, sample, cv::COLOR_BGR2RGB);
+
+    net.setBlob(".data", dnn::Blob::fromImages(sample));
+    net.forward();
+
+    Blob out = net.getBlob("score");
+
+    Blob ref = blobFromNPY(_tf("caffe_fcn8s_prob.npy"));
+    normAssert(ref, out);
+}
+
+#endif
+
+#endif
+
 }
diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp
diff --git a/modules/dnn/test/test_torch_importer.cpp b/modules/dnn/test/test_torch_importer.cpp
diff --git a/modules/dnn/testdata/dnn/torch/torch_gen_test_data.lua b/modules/dnn/testdata/dnn/torch/torch_gen_test_data.lua

Original file line number	Diff line number	Diff line change
`@@ -115,6 +115,7 @@ int main(int argc, char **argv)`
`115`	`115`	`}`
`116`	`116`
`117`	`117`	`resize(img, img, Size(224, 224)); //GoogLeNet accepts only 224x224 RGB-images`
	`118`	`+ cv::cvtColor(img, img, cv::COLOR_BGR2RGB);`
`118`	`119`	`dnn::Blob inputBlob = dnn::Blob::fromImages(img); //Convert Mat to dnn::Blob batch of images`
`119`	`120`	`//! [Prepare blob]`
`120`	`121`
Original file line number	Diff line number	Diff line change
`@@ -106,23 +106,23 @@ static XMat getPlane(XMat &m, int n, int cn)`
`106`	`106`	`void LRNLayerImpl::channelNoramlization(Blob &src, Blob &dst)`
`107`	`107`	`{`
`108`	`108`	`if (!useOpenCL)`
`109`		`- channelNoramlization_<Mat>(src, dst);`
	`109`	`+ channelNormalization_<Mat>(src, dst);`
`110`	`110`	`else`
`111`	`111`	`{`
`112`	`112`	`//channelNoramlization_ocl(src.getRefConst<UMat>(), dst.getRef<UMat>()); //consumes a lot of memory`
`113`		`- channelNoramlization_<UMat>(src, dst);`
	`113`	`+ channelNormalization_<UMat>(src, dst);`
`114`	`114`	`}`
`115`	`115`	`}`
`116`	`116`
`117`	`117`	`template<typename XMat>`
`118`		`-void LRNLayerImpl::channelNoramlization_(Blob &srcBlob, Blob &dstBlob)`
	`118`	`+void LRNLayerImpl::channelNormalization_(Blob &srcBlob, Blob &dstBlob)`
`119`	`119`	`{`
`120`	`120`	`int num = srcBlob.num();`
`121`	`121`	`int channels = srcBlob.channels();`
`122`	`122`	`int ksize = (size - 1) / 2;`
`123`	`123`	`int sizeNormFactor = normBySize ? size : 1;`
`124`	`124`
`125`		`- XMat srcMat = srcBlob.getRefConst<XMat>();`
	`125`	`+ XMat srcMat = srcBlob.getRefConst<XMat>().clone();`
`126`	`126`	`XMat dstMat = dstBlob.getRef<XMat>();`
`127`	`127`
`128`	`128`	`for (int n = 0; n < num; n++)`
`@@ -156,7 +156,7 @@ void LRNLayerImpl::channelNoramlization_(Blob &srcBlob, Blob &dstBlob)`
`156`	`156`	`}`
`157`	`157`	`}`
`158`	`158`
`159`		`-bool LRNLayerImpl::channelNoramlization_ocl(const UMat &src, UMat &dst)`
	`159`	`+bool LRNLayerImpl::channelNormalization_ocl(const UMat &src, UMat &dst)`
`160`	`160`	`{`
`161`	`161`	`#ifdef HAVE_OPENCL`
`162`	`162`	`if (src.offset != 0 \|\| dst.offset != 0) //TODO: add offset`
Original file line number	Diff line number	Diff line change
`@@ -44,6 +44,7 @@ void MaxUnpoolLayerImpl::forward(std::vector<Blob*> &inputs, std::vector<Blob> &`
`44`	`44`	`for(int i_n = 0; i_n < outputs.size(); i_n++)`
`45`	`45`	`{`
`46`	`46`	`Blob& outBlob = outputs[i_n];`
	`47`	`+ outBlob.setTo(0);`
`47`	`48`	`CV_Assert(input.channels() == outBlob.channels());`
`48`	`49`
`49`	`50`	`for (int i_c = 0; i_c < input.channels(); i_c++)`
Original file line number	Diff line number	Diff line change
`@@ -73,7 +73,7 @@ void ReshapeLayerImpl::forward(std::vector<Blob*> &inputs, std::vector<Blob> &ou`
`73`	`73`	`{`
`74`	`74`	`for (size_t i = 0; i < outputs.size(); i++)`
`75`	`75`	`{`
`76`		`- Blob& srcBlob = *inputs[i];`
	`76`	`+ Blob srcBlob = *inputs[i];`
`77`	`77`	`BlobShape inputShape = inputs[i]->shape();`
`78`	`78`	`bool channelsReduced = inputShape.dims() > outShapes[i].dims() \|\|`
`79`	`79`	`(inputShape.dims() == 4 && inputShape[1] > outShapes[i][1]);`
Original file line number	Diff line number	Diff line change
`@@ -640,7 +640,7 @@ void TFImporter::populateNet(Net dstNet)`
`640`	`640`	`if(hasLayerAttr(layer, "bias")) {`
`641`	`641`	`layerParams.set("bias", getLayerAttr(layer, "bias").f());`
`642`	`642`	`}`
`643`		`- layerParams.set("norm_sz", false);`
	`643`	`+ layerParams.set("norm_by_size", false);`
`644`	`644`
`645`	`645`	`int id = dstNet.addLayer(name, "LRN", layerParams);`
`646`	`646`	`layer_id[name] = id;`