Merge pull request #1236 from arrybn:crop_mean

vpisarev · vpisarev · commit 8ef2f71799d8 · 2017-06-21T13:12:58.000Z
diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp
@@ -598,9 +598,37 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
      *  @warning This function has the same limitations as createTorchImporter().
      */
     CV_EXPORTS_W Mat readTorchBlob(const String &filename, bool isBinary = true);
-
-    CV_EXPORTS Mat blobFromImage(const Mat& image, double scalefactor=1.0, bool swapRB=true);
-    CV_EXPORTS Mat blobFromImages(const std::vector<Mat>& image, double scalefactor=1.0, bool swapRB=true);
+    /** @brief Creates 4-dimensional blob from image. Optionally resizes and crops @p image from center,
+     *  subtract @p mean values, scales values by @p scalefactor, swap Blue and Red channels.
+     *  @param image input image (with 1- or 3-channels).
+     *  @param size spatial size for output image
+     *  @param mean scalar with mean values which are subtracted from channels. Values are intended
+     *  to be in (mean-R, mean-G, mean-B) order if @p image has BGR ordering and @p swapRB is true.
+     *  @param scalefactor multiplier for @p image values.
+     *  @param swapRB flag which indicates that swap first and last channels
+     *  in 3-channel image is necessary.
+     *  @details input image is resized so one side after resize is equal to corresponing
+     *  dimension in @p size and another one is equal or larger. Then, crop from the center is performed.
+     *  @returns 4-dimansional Mat with NCHW dimensions order.
+     */
+    CV_EXPORTS_W Mat blobFromImage(const Mat& image, double scalefactor=1.0, const Size& size = Size(),
+                                   const Scalar& mean = Scalar(), bool swapRB=true);
+    /** @brief Creates 4-dimensional blob from series of images. Optionally resizes and
+     *  crops @p images from center, subtract @p mean values, scales values by @p scalefactor,
+     *  swap Blue and Red channels.
+     *  @param images input images (all with 1- or 3-channels).
+     *  @param size spatial size for output image
+     *  @param mean scalar with mean values which are subtracted from channels. Values are intended
+     *  to be in (mean-R, mean-G, mean-B) order if @p image has BGR ordering and @p swapRB is true.
+     *  @param scalefactor multiplier for @p images values.
+     *  @param swapRB flag which indicates that swap first and last channels
+     *  in 3-channel image is necessary.
+     *  @details input image is resized so one side after resize is equal to corresponing
+     *  dimension in @p size and another one is equal or larger. Then, crop from the center is performed.
+     *  @returns 4-dimansional Mat with NCHW dimensions order.
+     */
+    CV_EXPORTS_W Mat blobFromImages(const std::vector<Mat>& images, double scalefactor=1.0,
+                                    Size size = Size(), const Scalar& mean = Scalar(), bool swapRB=true);
 
 //! @}
 }
diff --git a/modules/dnn/samples/caffe_googlenet.cpp b/modules/dnn/samples/caffe_googlenet.cpp
@@ -114,8 +114,9 @@ int main(int argc, char **argv)
         exit(-1);
     }
 
-    resize(img, img, Size(224, 224));                   //GoogLeNet accepts only 224x224 RGB-images
-    Mat inputBlob = blobFromImage(img);   //Convert Mat to batch of images
+    //GoogLeNet accepts only 224x224 RGB-images
+    Mat inputBlob = blobFromImage(img, 1, Size(224, 224),
+                                  Scalar(104, 117, 123));   //Convert Mat to batch of images
     //! [Prepare blob]
 
     //! [Set input blob]
diff --git a/modules/dnn/samples/squeezenet_halide.cpp b/modules/dnn/samples/squeezenet_halide.cpp
@@ -89,7 +89,7 @@ int main(int argc, char **argv)
     }
 
     resize(img, img, Size(227, 227));                // SqueezeNet v1.1 predict class by 3x227x227 input image.
-    Mat inputBlob = blobFromImage(img, 1.0, false);  // Convert Mat to 4-dimensional batch.
+    Mat inputBlob = blobFromImage(img, 1.0, Size(), Scalar(), false);  // Convert Mat to 4-dimensional batch.
     //! [Prepare blob]
 
     //! [Set input blob]
diff --git a/modules/dnn/samples/torch_enet.cpp b/modules/dnn/samples/torch_enet.cpp
@@ -69,7 +69,7 @@ int main(int argc, char **argv)
     if (inputImgSize != origSize)
         resize(img, img, inputImgSize);       //Resize image to input size
 
-    Mat inputBlob = blobFromImage(img, 1./255, true);   //Convert Mat to image batch
+    Mat inputBlob = blobFromImage(img, 1./255);   //Convert Mat to image batch
     //! [Prepare blob]
 
     //! [Set input blob]
diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp
@@ -48,6 +48,7 @@
 #include <sstream>
 #include <iterator>
 #include <opencv2/dnn/shape_utils.hpp>
+#include <opencv2/imgproc.hpp>
 
 using namespace cv;
 using namespace cv::dnn;
@@ -86,14 +87,42 @@ static String toString(const T &v)
     return ss.str();
 }
 
-Mat blobFromImage(const Mat& image_, double scalefactor, bool swapRB)
+Mat blobFromImage(const Mat& image, double scalefactor, const Size& size,
+                  const Scalar& mean, bool swapRB)
 {
-    std::vector<Mat> images(1, image_);
-    return blobFromImages(images, scalefactor, swapRB);
+    std::vector<Mat> images(1, image);
+    return blobFromImages(images, scalefactor, size, mean, swapRB);
 }
 
-Mat blobFromImages(const std::vector<Mat>& images, double scalefactor, bool swapRB)
+Mat blobFromImages(const std::vector<Mat>& images_, double scalefactor, Size size,
+                   const Scalar& mean_, bool swapRB)
 {
+    std::vector<Mat> images = images_;
+    for (int i = 0; i < images.size(); i++)
+    {
+        Size imgSize = images[i].size();
+        if (size == Size())
+            size = imgSize;
+        if (size != imgSize)
+        {
+            float resizeFactor = std::max(size.width / (float)imgSize.width,
+                                          size.height / (float)imgSize.height);
+            resize(images[i], images[i], Size(), resizeFactor, resizeFactor);
+            Rect crop(Point(0.5 * (images[i].cols - size.width),
+                            0.5 * (images[i].rows - size.height)),
+                      size);
+            images[i] = images[i](crop);
+        }
+        if(images[i].depth() == CV_8U)
+            images[i].convertTo(images[i], CV_32F);
+        Scalar mean = mean_;
+        if (swapRB)
+            std::swap(mean[0], mean[2]);
+
+        images[i] -= mean;
+        images[i] *= scalefactor;
+    }
+
     size_t i, nimages = images.size();
     if(nimages == 0)
         return Mat();
@@ -109,13 +138,7 @@ Mat blobFromImages(const std::vector<Mat>& images, double scalefactor, bool swap
 
         for( i = 0; i < nimages; i++ )
         {
-            Mat image_ = images[i];
-            if(image_.depth() == CV_8U)
-            {
-                image_.convertTo(image, CV_32F, scalefactor);
-            }
-            else
-                image = image_;
+            image = images[i];
             CV_Assert(image.depth() == CV_32F);
             nch = image.channels();
             CV_Assert(image.dims == 2 && (nch == 3 || nch == 4));
@@ -136,13 +159,7 @@ Mat blobFromImages(const std::vector<Mat>& images, double scalefactor, bool swap
 
        for( i = 0; i < nimages; i++ )
        {
-           Mat image_ = images[i];
-           if(image_.depth() == CV_8U)
-           {
-               image_.convertTo(image, CV_32F, scalefactor);
-           }
-           else
-               image = image_;
+           Mat image = images[i];
            CV_Assert(image.depth() == CV_32F);
            nch = image.channels();
            CV_Assert(image.dims == 2 && (nch == 1));
@@ -154,7 +171,6 @@ Mat blobFromImages(const std::vector<Mat>& images, double scalefactor, bool swap
     return blob;
 }
 
-
 struct LayerPin
 {
     int lid;
diff --git a/modules/dnn/test/test_caffe_importer.cpp b/modules/dnn/test/test_caffe_importer.cpp
@@ -94,7 +94,7 @@ TEST(Reproducibility_AlexNet, Accuracy)
     if (sample.size() != inputSize)
         resize(sample, sample, inputSize);
 
-    net.setInput(blobFromImage(sample, 1.), "data");
+    net.setInput(blobFromImage(sample), "data");
     Mat out = net.forward("prob");
     Mat ref = blobFromNPY(_tf("caffe_alexnet_prob.npy"));
     normAssert(ref, out);
@@ -123,7 +123,7 @@ TEST(Reproducibility_FCN, Accuracy)
     std::vector<size_t> weights, blobs;
     net.getMemoryConsumption(shape(1,3,227,227), layerIds, weights, blobs);
 
-    net.setInput(blobFromImage(sample, 1.), "data");
+    net.setInput(blobFromImage(sample), "data");
     Mat out = net.forward("score");
     Mat ref = blobFromNPY(_tf("caffe_fcn8s_prob.npy"));
     normAssert(ref, out);
diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp
@@ -40,7 +40,7 @@ TEST(Test_TensorFlow, read_inception)
     resize(sample, input, Size(224, 224));
     input -= 128; // mean sub
 
-    Mat inputBlob = blobFromImage(input, 1.);
+    Mat inputBlob = blobFromImage(input);
 
     net.setInput(inputBlob, "input");
     Mat out = net.forward("softmax2");
@@ -61,7 +61,7 @@ TEST(Test_TensorFlow, inception_accuracy)
     Mat sample = imread(_tf("grace_hopper_227.png"));
     ASSERT_TRUE(!sample.empty());
     resize(sample, sample, Size(224, 224));
-    Mat inputBlob = blobFromImage(sample, 1.);
+    Mat inputBlob = blobFromImage(sample);
 
     net.setInput(inputBlob, "input");
     Mat out = net.forward("softmax2");

Original file line number	Diff line number	Diff line change
`@@ -89,7 +89,7 @@ int main(int argc, char **argv)`
`89`	`89`	`}`
`90`	`90`
`91`	`91`	`resize(img, img, Size(227, 227)); // SqueezeNet v1.1 predict class by 3x227x227 input image.`
`92`		`- Mat inputBlob = blobFromImage(img, 1.0, false); // Convert Mat to 4-dimensional batch.`
	`92`	`+ Mat inputBlob = blobFromImage(img, 1.0, Size(), Scalar(), false); // Convert Mat to 4-dimensional batch.`
`93`	`93`	`//! [Prepare blob]`
`94`	`94`
`95`	`95`	`//! [Set input blob]`