Merge pull request #528 from lluisgomez:master

vpisarev · vpisarev · commit f073c003ebfa · 2016-03-22T14:01:20.000Z
diff --git a/modules/text/include/opencv2/text/erfilter.hpp b/modules/text/include/opencv2/text/erfilter.hpp
@@ -115,7 +115,7 @@ struct CV_EXPORTS ERStat
 
 Extracts the component tree (if needed) and filter the extremal regions (ER's) by using a given classifier.
  */
-class CV_EXPORTS ERFilter : public Algorithm
+class CV_EXPORTS_W ERFilter : public Algorithm
 {
 public:
 
@@ -124,7 +124,7 @@ class CV_EXPORTS ERFilter : public Algorithm
     By doing it we hide SVM, Boost etc. Developers can provide their own classifiers to the
     ERFilter algorithm.
      */
-    class CV_EXPORTS Callback
+    class CV_EXPORTS_W Callback
     {
     public:
         virtual ~Callback() { }
@@ -207,11 +207,11 @@ the probability P(er|character) are selected (if the local maximum of the probab
 global limit pmin and the difference between local maximum and local minimum is greater than
 minProbabilityDiff).
  */
-CV_EXPORTS Ptr<ERFilter> createERFilterNM1(const Ptr<ERFilter::Callback>& cb,
-                                                  int thresholdDelta = 1, float minArea = 0.00025,
-                                                  float maxArea = 0.13, float minProbability = 0.4,
+CV_EXPORTS_W Ptr<ERFilter> createERFilterNM1(const Ptr<ERFilter::Callback>& cb,
+                                                  int thresholdDelta = 1, float minArea = (float)0.00025,
+                                                  float maxArea = (float)0.13, float minProbability = (float)0.4,
                                                   bool nonMaxSuppression = true,
-                                                  float minProbabilityDiff = 0.1);
+                                                  float minProbabilityDiff = (float)0.1);
 
 /** @brief Create an Extremal Region Filter for the 2nd stage classifier of N&M algorithm [Neumann12].
 
@@ -224,8 +224,8 @@ non-character classes using more informative but also more computationally expen
 classifier uses all the features calculated in the first stage and the following additional
 features: hole area ratio, convex hull ratio, and number of outer inflexion points.
  */
-CV_EXPORTS Ptr<ERFilter> createERFilterNM2(const Ptr<ERFilter::Callback>& cb,
-                                                  float minProbability = 0.3);
+CV_EXPORTS_W Ptr<ERFilter> createERFilterNM2(const Ptr<ERFilter::Callback>& cb,
+                                                  float minProbability = (float)0.3);
 
 
 /** @brief Allow to implicitly load the default classifier when creating an ERFilter object.
@@ -234,15 +234,15 @@ CV_EXPORTS Ptr<ERFilter> createERFilterNM2(const Ptr<ERFilter::Callback>& cb,
 
 returns a pointer to ERFilter::Callback.
  */
-CV_EXPORTS Ptr<ERFilter::Callback> loadClassifierNM1(const std::string& filename);
+CV_EXPORTS_W Ptr<ERFilter::Callback> loadClassifierNM1(const String& filename);
 
 /** @brief Allow to implicitly load the default classifier when creating an ERFilter object.
 
 @param filename The XML or YAML file with the classifier model (e.g. trained_classifierNM2.xml)
 
 returns a pointer to ERFilter::Callback.
  */
-CV_EXPORTS Ptr<ERFilter::Callback> loadClassifierNM2(const std::string& filename);
+CV_EXPORTS_W Ptr<ERFilter::Callback> loadClassifierNM2(const String& filename);
 
 
 //! computeNMChannels operation modes
@@ -264,7 +264,7 @@ channels (Grad) are used in order to obtain high localization recall. This imple
 provides an alternative combination of red (R), green (G), blue (B), lightness (L), and gradient
 magnitude (Grad).
  */
-CV_EXPORTS void computeNMChannels(InputArray _src, OutputArrayOfArrays _channels, int _mode = ERFILTER_NM_RGBLGrad);
+CV_EXPORTS_W void computeNMChannels(InputArray _src, CV_OUT OutputArrayOfArrays _channels, int _mode = ERFILTER_NM_RGBLGrad);
 
 
 
@@ -324,6 +324,13 @@ CV_EXPORTS void erGrouping(InputArray img, InputArrayOfArrays channels,
                                            const std::string& filename = std::string(),
                                            float minProbablity = 0.5);
 
+CV_EXPORTS_W void erGrouping(InputArray image, InputArray channel,
+                                           std::vector<std::vector<Point> > regions,
+                                           CV_OUT std::vector<Rect> &groups_rects,
+                                           int method = ERGROUPING_ORIENTATION_HORIZ,
+                                           const String& filename = String(),
+                                           float minProbablity = (float)0.5);
+
 /** @brief Converts MSER contours (vector\<Point\>) to ERStat regions.
 
 @param image Source image CV_8UC1 from which the MSERs where extracted.
@@ -343,6 +350,9 @@ An example of MSERsToERStats in use can be found in the text detection webcam_de
 CV_EXPORTS void MSERsToERStats(InputArray image, std::vector<std::vector<Point> > &contours,
                                std::vector<std::vector<ERStat> > &regions);
 
+// Utility funtion for scripting
+CV_EXPORTS_W void detectRegions(InputArray image, const Ptr<ERFilter>& er_filter1, const Ptr<ERFilter>& er_filter2, CV_OUT std::vector< std::vector<Point> >& regions);
+
 //! @}
 
 }
diff --git a/modules/text/samples/detect_er_chars.py b/modules/text/samples/detect_er_chars.py
@@ -0,0 +1,39 @@
+#!/usr/bin/python
+
+import sys
+import os
+
+import cv2
+import numpy as np
+from matplotlib import pyplot as plt
+
+print('\ndetect_er_chars.py')
+print('       A simple demo script using the Extremal Region Filter algorithm described in:')
+print('       Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012\n')
+
+
+if (len(sys.argv) < 2):
+  print(' (ERROR) You must call this script with an argument (path_to_image_to_be_processed)\n')
+  quit()
+
+pathname = os.path.dirname(sys.argv[0])
+
+img  = cv2.imread(str(sys.argv[1]))
+gray = cv2.imread(str(sys.argv[1]),0)
+
+erc1 = cv2.text.loadClassifierNM1(pathname+'/trained_classifierNM1.xml')
+er1 = cv2.text.createERFilterNM1(erc1)
+
+erc2 = cv2.text.loadClassifierNM2(pathname+'/trained_classifierNM2.xml')
+er2 = cv2.text.createERFilterNM2(erc2)
+
+regions = cv2.text.detectRegions(gray,er1,er2)
+
+#Visualization
+rects = [cv2.boundingRect(p.reshape(-1, 1, 2)) for p in regions]
+for rect in rects:
+  cv2.rectangle(img, rect[0:2], (rect[0]+rect[2],rect[1]+rect[3]), (0, 0, 255), 2)
+img = img[:,:,::-1] #flip the colors dimension from BGR to RGB
+plt.imshow(img)
+plt.xticks([]), plt.yticks([])  # to hide tick values on X and Y axis
+plt.show()
diff --git a/modules/text/samples/textdetection.py b/modules/text/samples/textdetection.py
@@ -0,0 +1,60 @@
+#!/usr/bin/python
+
+import sys
+import os
+
+import cv2
+import numpy as np
+from matplotlib import pyplot as plt
+
+print('\ntextdetection.py')
+print('       A demo script of the Extremal Region Filter algorithm described in:')
+print('       Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012\n')
+
+
+if (len(sys.argv) < 2):
+  print(' (ERROR) You must call this script with an argument (path_to_image_to_be_processed)\n')
+  quit()
+
+pathname = os.path.dirname(sys.argv[0])
+
+
+img      = cv2.imread(str(sys.argv[1]))
+# for visualization
+vis      = img.copy()
+
+
+# Extract channels to be processed individually
+channels = cv2.text.computeNMChannels(img)
+# Append negative channels to detect ER- (bright regions over dark background)
+cn = len(channels)-1
+for c in range(0,cn):
+  channels.append((255-channels[c]))
+
+# Apply the default cascade classifier to each independent channel (could be done in parallel)
+print("Extracting Class Specific Extremal Regions from "+str(len(channels))+" channels ...")
+print("    (...) this may take a while (...)")
+for channel in channels:
+
+  erc1 = cv2.text.loadClassifierNM1(pathname+'/trained_classifierNM1.xml')
+  er1 = cv2.text.createERFilterNM1(erc1,16,0.00015,0.13,0.2,True,0.1)
+
+  erc2 = cv2.text.loadClassifierNM2(pathname+'/trained_classifierNM2.xml')
+  er2 = cv2.text.createERFilterNM2(erc2,0.5)
+
+  regions = cv2.text.detectRegions(channel,er1,er2)
+
+  rects = cv2.text.erGrouping(img,channel,[r.tolist() for r in regions])
+  #rects = cv2.text.erGrouping(img,gray,[x.tolist() for x in regions], cv2.text.ERGROUPING_ORIENTATION_ANY,'../../GSoC2014/opencv_contrib/modules/text/samples/trained_classifier_erGrouping.xml',0.5)
+
+  #Visualization
+  for r in range(0,np.shape(rects)[0]):
+    rect = rects[r]
+    cv2.rectangle(vis, (rect[0],rect[1]), (rect[0]+rect[2],rect[1]+rect[3]), (0, 255, 255), 2)
+
+
+#Visualization
+vis = vis[:,:,::-1] #flip the colors dimension from BGR to RGB
+plt.imshow(vis)
+plt.xticks([]), plt.yticks([])  # to hide tick values on X and Y axis
+plt.show()
diff --git a/modules/text/src/erfilter.cpp b/modules/text/src/erfilter.cpp
@@ -1161,7 +1161,7 @@ Ptr<ERFilter> createERFilterNM2(const Ptr<ERFilter::Callback>& cb, float minProb
     The function takes as parameter the XML or YAML file with the classifier model
     (e.g. trained_classifierNM1.xml) returns a pointer to ERFilter::Callback.
 */
-Ptr<ERFilter::Callback> loadClassifierNM1(const string& filename)
+Ptr<ERFilter::Callback> loadClassifierNM1(const String& filename)
 
 {
     return makePtr<ERClassifierNM1>(filename);
@@ -1172,7 +1172,7 @@ Ptr<ERFilter::Callback> loadClassifierNM1(const string& filename)
     The function takes as parameter the XML or YAML file with the classifier model
     (e.g. trained_classifierNM2.xml) returns a pointer to ERFilter::Callback.
 */
-Ptr<ERFilter::Callback> loadClassifierNM2(const string& filename)
+Ptr<ERFilter::Callback> loadClassifierNM2(const String& filename)
 {
     return makePtr<ERClassifierNM2>(filename);
 }
@@ -1236,7 +1236,7 @@ void get_gradient_magnitude(Mat& _grey_img, Mat& _gradient_magnitude)
                            ERFILTER_NM_RGBLGrad and ERFILTER_NM_IHSGrad.
 
 */
-void computeNMChannels(InputArray _src, OutputArrayOfArrays _channels, int _mode)
+void computeNMChannels(InputArray _src, CV_OUT OutputArrayOfArrays _channels, int _mode)
 {
 
     CV_Assert( ( _mode == ERFILTER_NM_RGBLGrad ) || ( _mode == ERFILTER_NM_IHSGrad ) );
@@ -4094,6 +4094,22 @@ void erGrouping(InputArray image, InputArrayOfArrays channels, vector<vector<ERS
 
 }
 
+void erGrouping(InputArray image, InputArray channel, vector<vector<Point> > contours, CV_OUT std::vector<Rect> &groups_rects, int method, const String& filename, float minProbability)
+{
+    CV_Assert( image.getMat().type() == CV_8UC3 );
+    CV_Assert( channel.getMat().type() == CV_8UC1 );
+    CV_Assert( !((method == ERGROUPING_ORIENTATION_ANY) && (filename.empty())) );
+
+    vector<Mat> channels;
+    channels.push_back(channel.getMat());
+    vector<vector<ERStat> > regions;
+    MSERsToERStats(channel, contours, regions);
+    regions.pop_back();
+    std::vector<std::vector<Vec2i> > groups;
+
+    erGrouping(image, channels, regions,  groups,  groups_rects, method, filename, minProbability);
+}
+
 /*!
  * MSERsToERStats function converts MSER contours (vector<Point>) to ERStat regions.
  * It takes as input the contours provided by the OpenCV MSER feature detector and returns as output two vectors
@@ -4167,5 +4183,55 @@ void MSERsToERStats(InputArray image, vector<vector<Point> > &contours, vector<v
   }
 }
 
+// Utility funtion for scripting
+void detectRegions(InputArray image, const Ptr<ERFilter>& er_filter1, const Ptr<ERFilter>& er_filter2, CV_OUT vector< vector<Point> >& regions)
+{
+    // assert correct image type
+    CV_Assert( image.getMat().type() == CV_8UC1 );
+    // at least one ERFilter must be passed
+    CV_Assert( !er_filter1.empty() );
+
+    vector<ERStat> ers;
+
+    er_filter1->run(image, ers);
+
+    if (!er_filter2.empty())
+    {
+      er_filter2->run(image, ers);
+    }
+
+    //Convert each ER to vector<Point> and push it to output regions
+    Mat src = image.getMat();
+    Mat region_mask = Mat::zeros(src.rows+2, src.cols+2, CV_8UC1);
+    for (size_t i=1; i < ers.size(); i++) //start from 1 to deprecate root region
+    {
+      ERStat* stat = &ers[i];
+
+      //Fill the region and calculate 2nd stage features
+      Mat region = region_mask(Rect(Point(stat->rect.x,stat->rect.y),Point(stat->rect.br().x+2,stat->rect.br().y+2)));
+      region = Scalar(0);
+      int newMaskVal = 255;
+      int flags = 4 + (newMaskVal << 8) + FLOODFILL_FIXED_RANGE + FLOODFILL_MASK_ONLY;
+      Rect rect;
+
+      floodFill( src(Rect(Point(stat->rect.x,stat->rect.y),Point(stat->rect.br().x,stat->rect.br().y))),
+                 region, Point(stat->pixel%src.cols - stat->rect.x, stat->pixel/src.cols - stat->rect.y),
+                 Scalar(255), &rect, Scalar(stat->level), Scalar(0), flags );
+      rect.width += 2;
+      rect.height += 2;
+      region = region(rect);
+
+      vector<vector<Point> > contours;
+      vector<Vec4i> hierarchy;
+      findContours( region, contours, hierarchy, RETR_TREE, CHAIN_APPROX_NONE, Point(0, 0) );
+
+      for (size_t j=0; j < contours[0].size(); j++)
+        contours[0][j] += (stat->rect.tl()-Point(1,1));
+
+      regions.push_back(contours[0]);
+    }
+
+}
+
 }
 }