Skip to content

Commit f073c00

Browse files
committed
Merge pull request #528 from lluisgomez:master
2 parents 6cd8e9f + f07a00c commit f073c00

File tree

4 files changed

+189
-14
lines changed

4 files changed

+189
-14
lines changed

modules/text/include/opencv2/text/erfilter.hpp

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ struct CV_EXPORTS ERStat
115115
116116
Extracts the component tree (if needed) and filter the extremal regions (ER's) by using a given classifier.
117117
*/
118-
class CV_EXPORTS ERFilter : public Algorithm
118+
class CV_EXPORTS_W ERFilter : public Algorithm
119119
{
120120
public:
121121

@@ -124,7 +124,7 @@ class CV_EXPORTS ERFilter : public Algorithm
124124
By doing it we hide SVM, Boost etc. Developers can provide their own classifiers to the
125125
ERFilter algorithm.
126126
*/
127-
class CV_EXPORTS Callback
127+
class CV_EXPORTS_W Callback
128128
{
129129
public:
130130
virtual ~Callback() { }
@@ -207,11 +207,11 @@ the probability P(er|character) are selected (if the local maximum of the probab
207207
global limit pmin and the difference between local maximum and local minimum is greater than
208208
minProbabilityDiff).
209209
*/
210-
CV_EXPORTS Ptr<ERFilter> createERFilterNM1(const Ptr<ERFilter::Callback>& cb,
211-
int thresholdDelta = 1, float minArea = 0.00025,
212-
float maxArea = 0.13, float minProbability = 0.4,
210+
CV_EXPORTS_W Ptr<ERFilter> createERFilterNM1(const Ptr<ERFilter::Callback>& cb,
211+
int thresholdDelta = 1, float minArea = (float)0.00025,
212+
float maxArea = (float)0.13, float minProbability = (float)0.4,
213213
bool nonMaxSuppression = true,
214-
float minProbabilityDiff = 0.1);
214+
float minProbabilityDiff = (float)0.1);
215215

216216
/** @brief Create an Extremal Region Filter for the 2nd stage classifier of N&M algorithm [Neumann12].
217217
@@ -224,8 +224,8 @@ non-character classes using more informative but also more computationally expen
224224
classifier uses all the features calculated in the first stage and the following additional
225225
features: hole area ratio, convex hull ratio, and number of outer inflexion points.
226226
*/
227-
CV_EXPORTS Ptr<ERFilter> createERFilterNM2(const Ptr<ERFilter::Callback>& cb,
228-
float minProbability = 0.3);
227+
CV_EXPORTS_W Ptr<ERFilter> createERFilterNM2(const Ptr<ERFilter::Callback>& cb,
228+
float minProbability = (float)0.3);
229229

230230

231231
/** @brief Allow to implicitly load the default classifier when creating an ERFilter object.
@@ -234,15 +234,15 @@ CV_EXPORTS Ptr<ERFilter> createERFilterNM2(const Ptr<ERFilter::Callback>& cb,
234234
235235
returns a pointer to ERFilter::Callback.
236236
*/
237-
CV_EXPORTS Ptr<ERFilter::Callback> loadClassifierNM1(const std::string& filename);
237+
CV_EXPORTS_W Ptr<ERFilter::Callback> loadClassifierNM1(const String& filename);
238238

239239
/** @brief Allow to implicitly load the default classifier when creating an ERFilter object.
240240
241241
@param filename The XML or YAML file with the classifier model (e.g. trained_classifierNM2.xml)
242242
243243
returns a pointer to ERFilter::Callback.
244244
*/
245-
CV_EXPORTS Ptr<ERFilter::Callback> loadClassifierNM2(const std::string& filename);
245+
CV_EXPORTS_W Ptr<ERFilter::Callback> loadClassifierNM2(const String& filename);
246246

247247

248248
//! computeNMChannels operation modes
@@ -264,7 +264,7 @@ channels (Grad) are used in order to obtain high localization recall. This imple
264264
provides an alternative combination of red (R), green (G), blue (B), lightness (L), and gradient
265265
magnitude (Grad).
266266
*/
267-
CV_EXPORTS void computeNMChannels(InputArray _src, OutputArrayOfArrays _channels, int _mode = ERFILTER_NM_RGBLGrad);
267+
CV_EXPORTS_W void computeNMChannels(InputArray _src, CV_OUT OutputArrayOfArrays _channels, int _mode = ERFILTER_NM_RGBLGrad);
268268

269269

270270

@@ -324,6 +324,13 @@ CV_EXPORTS void erGrouping(InputArray img, InputArrayOfArrays channels,
324324
const std::string& filename = std::string(),
325325
float minProbablity = 0.5);
326326

327+
CV_EXPORTS_W void erGrouping(InputArray image, InputArray channel,
328+
std::vector<std::vector<Point> > regions,
329+
CV_OUT std::vector<Rect> &groups_rects,
330+
int method = ERGROUPING_ORIENTATION_HORIZ,
331+
const String& filename = String(),
332+
float minProbablity = (float)0.5);
333+
327334
/** @brief Converts MSER contours (vector\<Point\>) to ERStat regions.
328335
329336
@param image Source image CV_8UC1 from which the MSERs where extracted.
@@ -343,6 +350,9 @@ An example of MSERsToERStats in use can be found in the text detection webcam_de
343350
CV_EXPORTS void MSERsToERStats(InputArray image, std::vector<std::vector<Point> > &contours,
344351
std::vector<std::vector<ERStat> > &regions);
345352

353+
// Utility funtion for scripting
354+
CV_EXPORTS_W void detectRegions(InputArray image, const Ptr<ERFilter>& er_filter1, const Ptr<ERFilter>& er_filter2, CV_OUT std::vector< std::vector<Point> >& regions);
355+
346356
//! @}
347357

348358
}
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
#!/usr/bin/python
2+
3+
import sys
4+
import os
5+
6+
import cv2
7+
import numpy as np
8+
from matplotlib import pyplot as plt
9+
10+
print('\ndetect_er_chars.py')
11+
print(' A simple demo script using the Extremal Region Filter algorithm described in:')
12+
print(' Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012\n')
13+
14+
15+
if (len(sys.argv) < 2):
16+
print(' (ERROR) You must call this script with an argument (path_to_image_to_be_processed)\n')
17+
quit()
18+
19+
pathname = os.path.dirname(sys.argv[0])
20+
21+
img = cv2.imread(str(sys.argv[1]))
22+
gray = cv2.imread(str(sys.argv[1]),0)
23+
24+
erc1 = cv2.text.loadClassifierNM1(pathname+'/trained_classifierNM1.xml')
25+
er1 = cv2.text.createERFilterNM1(erc1)
26+
27+
erc2 = cv2.text.loadClassifierNM2(pathname+'/trained_classifierNM2.xml')
28+
er2 = cv2.text.createERFilterNM2(erc2)
29+
30+
regions = cv2.text.detectRegions(gray,er1,er2)
31+
32+
#Visualization
33+
rects = [cv2.boundingRect(p.reshape(-1, 1, 2)) for p in regions]
34+
for rect in rects:
35+
cv2.rectangle(img, rect[0:2], (rect[0]+rect[2],rect[1]+rect[3]), (0, 0, 255), 2)
36+
img = img[:,:,::-1] #flip the colors dimension from BGR to RGB
37+
plt.imshow(img)
38+
plt.xticks([]), plt.yticks([]) # to hide tick values on X and Y axis
39+
plt.show()

modules/text/samples/textdetection.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
#!/usr/bin/python
2+
3+
import sys
4+
import os
5+
6+
import cv2
7+
import numpy as np
8+
from matplotlib import pyplot as plt
9+
10+
print('\ntextdetection.py')
11+
print(' A demo script of the Extremal Region Filter algorithm described in:')
12+
print(' Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012\n')
13+
14+
15+
if (len(sys.argv) < 2):
16+
print(' (ERROR) You must call this script with an argument (path_to_image_to_be_processed)\n')
17+
quit()
18+
19+
pathname = os.path.dirname(sys.argv[0])
20+
21+
22+
img = cv2.imread(str(sys.argv[1]))
23+
# for visualization
24+
vis = img.copy()
25+
26+
27+
# Extract channels to be processed individually
28+
channels = cv2.text.computeNMChannels(img)
29+
# Append negative channels to detect ER- (bright regions over dark background)
30+
cn = len(channels)-1
31+
for c in range(0,cn):
32+
channels.append((255-channels[c]))
33+
34+
# Apply the default cascade classifier to each independent channel (could be done in parallel)
35+
print("Extracting Class Specific Extremal Regions from "+str(len(channels))+" channels ...")
36+
print(" (...) this may take a while (...)")
37+
for channel in channels:
38+
39+
erc1 = cv2.text.loadClassifierNM1(pathname+'/trained_classifierNM1.xml')
40+
er1 = cv2.text.createERFilterNM1(erc1,16,0.00015,0.13,0.2,True,0.1)
41+
42+
erc2 = cv2.text.loadClassifierNM2(pathname+'/trained_classifierNM2.xml')
43+
er2 = cv2.text.createERFilterNM2(erc2,0.5)
44+
45+
regions = cv2.text.detectRegions(channel,er1,er2)
46+
47+
rects = cv2.text.erGrouping(img,channel,[r.tolist() for r in regions])
48+
#rects = cv2.text.erGrouping(img,gray,[x.tolist() for x in regions], cv2.text.ERGROUPING_ORIENTATION_ANY,'../../GSoC2014/opencv_contrib/modules/text/samples/trained_classifier_erGrouping.xml',0.5)
49+
50+
#Visualization
51+
for r in range(0,np.shape(rects)[0]):
52+
rect = rects[r]
53+
cv2.rectangle(vis, (rect[0],rect[1]), (rect[0]+rect[2],rect[1]+rect[3]), (0, 255, 255), 2)
54+
55+
56+
#Visualization
57+
vis = vis[:,:,::-1] #flip the colors dimension from BGR to RGB
58+
plt.imshow(vis)
59+
plt.xticks([]), plt.yticks([]) # to hide tick values on X and Y axis
60+
plt.show()

modules/text/src/erfilter.cpp

Lines changed: 69 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1161,7 +1161,7 @@ Ptr<ERFilter> createERFilterNM2(const Ptr<ERFilter::Callback>& cb, float minProb
11611161
The function takes as parameter the XML or YAML file with the classifier model
11621162
(e.g. trained_classifierNM1.xml) returns a pointer to ERFilter::Callback.
11631163
*/
1164-
Ptr<ERFilter::Callback> loadClassifierNM1(const string& filename)
1164+
Ptr<ERFilter::Callback> loadClassifierNM1(const String& filename)
11651165

11661166
{
11671167
return makePtr<ERClassifierNM1>(filename);
@@ -1172,7 +1172,7 @@ Ptr<ERFilter::Callback> loadClassifierNM1(const string& filename)
11721172
The function takes as parameter the XML or YAML file with the classifier model
11731173
(e.g. trained_classifierNM2.xml) returns a pointer to ERFilter::Callback.
11741174
*/
1175-
Ptr<ERFilter::Callback> loadClassifierNM2(const string& filename)
1175+
Ptr<ERFilter::Callback> loadClassifierNM2(const String& filename)
11761176
{
11771177
return makePtr<ERClassifierNM2>(filename);
11781178
}
@@ -1236,7 +1236,7 @@ void get_gradient_magnitude(Mat& _grey_img, Mat& _gradient_magnitude)
12361236
ERFILTER_NM_RGBLGrad and ERFILTER_NM_IHSGrad.
12371237
12381238
*/
1239-
void computeNMChannels(InputArray _src, OutputArrayOfArrays _channels, int _mode)
1239+
void computeNMChannels(InputArray _src, CV_OUT OutputArrayOfArrays _channels, int _mode)
12401240
{
12411241

12421242
CV_Assert( ( _mode == ERFILTER_NM_RGBLGrad ) || ( _mode == ERFILTER_NM_IHSGrad ) );
@@ -4094,6 +4094,22 @@ void erGrouping(InputArray image, InputArrayOfArrays channels, vector<vector<ERS
40944094

40954095
}
40964096

4097+
void erGrouping(InputArray image, InputArray channel, vector<vector<Point> > contours, CV_OUT std::vector<Rect> &groups_rects, int method, const String& filename, float minProbability)
4098+
{
4099+
CV_Assert( image.getMat().type() == CV_8UC3 );
4100+
CV_Assert( channel.getMat().type() == CV_8UC1 );
4101+
CV_Assert( !((method == ERGROUPING_ORIENTATION_ANY) && (filename.empty())) );
4102+
4103+
vector<Mat> channels;
4104+
channels.push_back(channel.getMat());
4105+
vector<vector<ERStat> > regions;
4106+
MSERsToERStats(channel, contours, regions);
4107+
regions.pop_back();
4108+
std::vector<std::vector<Vec2i> > groups;
4109+
4110+
erGrouping(image, channels, regions, groups, groups_rects, method, filename, minProbability);
4111+
}
4112+
40974113
/*!
40984114
* MSERsToERStats function converts MSER contours (vector<Point>) to ERStat regions.
40994115
* It takes as input the contours provided by the OpenCV MSER feature detector and returns as output two vectors
@@ -4167,5 +4183,55 @@ void MSERsToERStats(InputArray image, vector<vector<Point> > &contours, vector<v
41674183
}
41684184
}
41694185

4186+
// Utility funtion for scripting
4187+
void detectRegions(InputArray image, const Ptr<ERFilter>& er_filter1, const Ptr<ERFilter>& er_filter2, CV_OUT vector< vector<Point> >& regions)
4188+
{
4189+
// assert correct image type
4190+
CV_Assert( image.getMat().type() == CV_8UC1 );
4191+
// at least one ERFilter must be passed
4192+
CV_Assert( !er_filter1.empty() );
4193+
4194+
vector<ERStat> ers;
4195+
4196+
er_filter1->run(image, ers);
4197+
4198+
if (!er_filter2.empty())
4199+
{
4200+
er_filter2->run(image, ers);
4201+
}
4202+
4203+
//Convert each ER to vector<Point> and push it to output regions
4204+
Mat src = image.getMat();
4205+
Mat region_mask = Mat::zeros(src.rows+2, src.cols+2, CV_8UC1);
4206+
for (size_t i=1; i < ers.size(); i++) //start from 1 to deprecate root region
4207+
{
4208+
ERStat* stat = &ers[i];
4209+
4210+
//Fill the region and calculate 2nd stage features
4211+
Mat region = region_mask(Rect(Point(stat->rect.x,stat->rect.y),Point(stat->rect.br().x+2,stat->rect.br().y+2)));
4212+
region = Scalar(0);
4213+
int newMaskVal = 255;
4214+
int flags = 4 + (newMaskVal << 8) + FLOODFILL_FIXED_RANGE + FLOODFILL_MASK_ONLY;
4215+
Rect rect;
4216+
4217+
floodFill( src(Rect(Point(stat->rect.x,stat->rect.y),Point(stat->rect.br().x,stat->rect.br().y))),
4218+
region, Point(stat->pixel%src.cols - stat->rect.x, stat->pixel/src.cols - stat->rect.y),
4219+
Scalar(255), &rect, Scalar(stat->level), Scalar(0), flags );
4220+
rect.width += 2;
4221+
rect.height += 2;
4222+
region = region(rect);
4223+
4224+
vector<vector<Point> > contours;
4225+
vector<Vec4i> hierarchy;
4226+
findContours( region, contours, hierarchy, RETR_TREE, CHAIN_APPROX_NONE, Point(0, 0) );
4227+
4228+
for (size_t j=0; j < contours[0].size(); j++)
4229+
contours[0][j] += (stat->rect.tl()-Point(1,1));
4230+
4231+
regions.push_back(contours[0]);
4232+
}
4233+
4234+
}
4235+
41704236
}
41714237
}

0 commit comments

Comments
 (0)