Skip to content

Commit fe05681

Browse files
committed
text python bindings
1 parent c8053da commit fe05681

File tree

4 files changed

+221
-24
lines changed

4 files changed

+221
-24
lines changed

modules/text/include/opencv2/text/ocr.hpp

Lines changed: 54 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/*M///////////////////////////////////////////////////////////////////////////////////////
1+
/*M//////////////////////////////////////////////////////////////////////////////////////////
22
//
33
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
44
//
@@ -62,7 +62,7 @@ enum
6262
};
6363

6464
//base class BaseOCR declares a common API that would be used in a typical text recognition scenario
65-
class CV_EXPORTS BaseOCR
65+
class CV_EXPORTS_W BaseOCR
6666
{
6767
public:
6868
virtual ~BaseOCR() {};
@@ -86,7 +86,7 @@ Notice that it is compiled only when tesseract-ocr is correctly installed.
8686
found at the webcam_demo:
8787
<https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/webcam_demo.cpp>
8888
*/
89-
class CV_EXPORTS OCRTesseract : public BaseOCR
89+
class CV_EXPORTS_W OCRTesseract : public BaseOCR
9090
{
9191
public:
9292
/** @brief Recognize text using the tesseract-ocr API.
@@ -113,6 +113,14 @@ class CV_EXPORTS OCRTesseract : public BaseOCR
113113
std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
114114
int component_level=0);
115115

116+
// aliases for scripting
117+
CV_WRAP String run(InputArray image, int min_confidence, int component_level=0);
118+
119+
CV_WRAP String run(InputArray image, InputArray mask, int min_confidence, int component_level=0);
120+
121+
CV_WRAP virtual void setWhiteList(const String& char_whitelist) = 0;
122+
123+
116124
/** @brief Creates an instance of the OCRTesseract class. Initializes Tesseract.
117125
118126
@param datapath the name of the parent directory of tessdata ended with "/", or NULL to use the
@@ -127,7 +135,7 @@ class CV_EXPORTS OCRTesseract : public BaseOCR
127135
(fully automatic layout analysis) is used. See the tesseract-ocr API documentation for other
128136
possible values.
129137
*/
130-
static Ptr<OCRTesseract> create(const char* datapath=NULL, const char* language=NULL,
138+
CV_WRAP static Ptr<OCRTesseract> create(const char* datapath=NULL, const char* language=NULL,
131139
const char* char_whitelist=NULL, int oem=3, int psmode=3);
132140
};
133141

@@ -146,7 +154,7 @@ enum decoder_mode
146154
be found at the webcam_demo sample:
147155
<https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/webcam_demo.cpp>
148156
*/
149-
class CV_EXPORTS OCRHMMDecoder : public BaseOCR
157+
class CV_EXPORTS_W OCRHMMDecoder : public BaseOCR
150158
{
151159
public:
152160

@@ -159,7 +167,7 @@ class CV_EXPORTS OCRHMMDecoder : public BaseOCR
159167
loadOCRHMMClassifierNM and KNN model provided in
160168
<https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/OCRHMM_knn_model_data.xml.gz>.
161169
*/
162-
class CV_EXPORTS ClassifierCallback
170+
class CV_EXPORTS_W ClassifierCallback
163171
{
164172
public:
165173
virtual ~ClassifierCallback() { }
@@ -227,6 +235,11 @@ class CV_EXPORTS OCRHMMDecoder : public BaseOCR
227235
std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
228236
int component_level=0);
229237

238+
// aliases for scripting
239+
CV_WRAP String run(InputArray image, int min_confidence, int component_level=0);
240+
241+
CV_WRAP String run(InputArray image, InputArray mask, int min_confidence, int component_level=0);
242+
230243
/** @brief Creates an instance of the OCRHMMDecoder class. Initializes HMMDecoder.
231244
232245
@param classifier The character classifier with built in feature extractor.
@@ -252,6 +265,15 @@ class CV_EXPORTS OCRHMMDecoder : public BaseOCR
252265
// cols == rows == vocabulari.size()
253266
decoder_mode mode = OCR_DECODER_VITERBI); // HMM Decoding algorithm (only Viterbi for the moment)
254267

268+
CV_WRAP static Ptr<OCRHMMDecoder> create(const Ptr<OCRHMMDecoder::ClassifierCallback> classifier,// The character classifier with built in feature extractor
269+
const String& vocabulary, // The language vocabulary (chars when ascii english text)
270+
// size() must be equal to the number of classes
271+
InputArray transition_probabilities_table, // Table with transition probabilities between character pairs
272+
// cols == rows == vocabulari.size()
273+
InputArray emission_probabilities_table, // Table with observation emission probabilities
274+
// cols == rows == vocabulari.size()
275+
int mode = OCR_DECODER_VITERBI); // HMM Decoding algorithm (only Viterbi for the moment)
276+
255277
protected:
256278

257279
Ptr<OCRHMMDecoder::ClassifierCallback> classifier;
@@ -272,7 +294,8 @@ based on gradient orientations along the chain-code of its perimeter. Then, the
272294
using a KNN model trained with synthetic data of rendered characters with different standard font
273295
types.
274296
*/
275-
CV_EXPORTS Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierNM(const std::string& filename);
297+
298+
CV_EXPORTS_W Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierNM(const String& filename);
276299

277300
/** @brief Allow to implicitly load the default character classifier when creating an OCRHMMDecoder object.
278301
@@ -283,7 +306,7 @@ Andrew NG in [Coates11a]. The character classifier consists in a Single Layer Co
283306
a linear classifier. It is applied to the input image in a sliding window fashion, providing a set of recognitions
284307
at each window location.
285308
*/
286-
CV_EXPORTS Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierCNN(const std::string& filename);
309+
CV_EXPORTS_W Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierCNN(const String& filename);
287310

288311
//! @}
289312

@@ -299,9 +322,11 @@ CV_EXPORTS Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierCNN(const
299322
* @note
300323
* - (C++) An alternative would be to load the default generic language transition table provided in the text module samples folder (created from ispell 42869 english words list) :
301324
* <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/OCRHMM_transitions_table.xml>
302-
* */
325+
**/
303326
CV_EXPORTS void createOCRHMMTransitionsTable(std::string& vocabulary, std::vector<std::string>& lexicon, OutputArray transition_probabilities_table);
304327

328+
CV_EXPORTS_W Mat createOCRHMMTransitionsTable(const String& vocabulary, std::vector<cv::String>& lexicon);
329+
305330

306331
/* OCR BeamSearch Decoder */
307332

@@ -312,7 +337,7 @@ CV_EXPORTS void createOCRHMMTransitionsTable(std::string& vocabulary, std::vecto
312337
be found at the demo sample:
313338
<https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/word_recognition.cpp>
314339
*/
315-
class CV_EXPORTS OCRBeamSearchDecoder : public BaseOCR
340+
class CV_EXPORTS_W OCRBeamSearchDecoder : public BaseOCR
316341
{
317342
public:
318343

@@ -325,7 +350,7 @@ class CV_EXPORTS OCRBeamSearchDecoder : public BaseOCR
325350
loadOCRBeamSearchClassifierCNN with all its parameters provided in
326351
<https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/OCRBeamSearch_CNN_model_data.xml.gz>.
327352
*/
328-
class CV_EXPORTS ClassifierCallback
353+
class CV_EXPORTS_W ClassifierCallback
329354
{
330355
public:
331356
virtual ~ClassifierCallback() { }
@@ -350,7 +375,7 @@ class CV_EXPORTS OCRBeamSearchDecoder : public BaseOCR
350375
provides also the Rects for individual text elements found (e.g. words), and the list of those
351376
text elements with their confidence values.
352377
353-
@param image Input image CV_8UC1 with a single text line (or word).
378+
@param image Input binary image CV_8UC1 with a single text line (or word).
354379
355380
@param output_text Output text. Most likely character sequence found by the HMM decoder.
356381
@@ -373,6 +398,11 @@ class CV_EXPORTS OCRBeamSearchDecoder : public BaseOCR
373398
std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
374399
int component_level=0);
375400

401+
// aliases for scripting
402+
CV_WRAP String run(InputArray image, int min_confidence, int component_level=0);
403+
404+
CV_WRAP String run(InputArray image, InputArray mask, int min_confidence, int component_level=0);
405+
376406
/** @brief Creates an instance of the OCRBeamSearchDecoder class. Initializes HMMDecoder.
377407
378408
@param classifier The character classifier with built in feature extractor.
@@ -401,6 +431,16 @@ class CV_EXPORTS OCRBeamSearchDecoder : public BaseOCR
401431
decoder_mode mode = OCR_DECODER_VITERBI, // HMM Decoding algorithm (only Viterbi for the moment)
402432
int beam_size = 500); // Size of the beam in Beam Search algorithm
403433

434+
CV_WRAP static Ptr<OCRBeamSearchDecoder> create(const Ptr<OCRBeamSearchDecoder::ClassifierCallback> classifier, // The character classifier with built in feature extractor
435+
const String& vocabulary, // The language vocabulary (chars when ascii english text)
436+
// size() must be equal to the number of classes
437+
InputArray transition_probabilities_table, // Table with transition probabilities between character pairs
438+
// cols == rows == vocabulari.size()
439+
InputArray emission_probabilities_table, // Table with observation emission probabilities
440+
// cols == rows == vocabulari.size()
441+
int mode = OCR_DECODER_VITERBI, // HMM Decoding algorithm (only Viterbi for the moment)
442+
int beam_size = 500); // Size of the beam in Beam Search algorithm
443+
404444
protected:
405445

406446
Ptr<OCRBeamSearchDecoder::ClassifierCallback> classifier;
@@ -420,7 +460,8 @@ Andrew NG in [Coates11a]. The character classifier consists in a Single Layer Co
420460
a linear classifier. It is applied to the input image in a sliding window fashion, providing a set of recognitions
421461
at each window location.
422462
*/
423-
CV_EXPORTS Ptr<OCRBeamSearchDecoder::ClassifierCallback> loadOCRBeamSearchClassifierCNN(const std::string& filename);
463+
464+
CV_EXPORTS_W Ptr<OCRBeamSearchDecoder::ClassifierCallback> loadOCRBeamSearchClassifierCNN(const String& filename);
424465

425466
//! @}
426467

modules/text/src/ocr_beamsearch_decoder.cpp

Lines changed: 51 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,45 @@ void OCRBeamSearchDecoder::run(Mat& image, Mat& mask, string& output_text, vecto
8888
component_confidences->clear();
8989
}
9090

91+
CV_WRAP String OCRBeamSearchDecoder::run(InputArray image, int min_confidence, int component_level)
92+
{
93+
std::string output1;
94+
std::string output2;
95+
vector<string> component_texts;
96+
vector<float> component_confidences;
97+
Mat image_m = image.getMat();
98+
run(image_m, output1, NULL, &component_texts, &component_confidences, component_level);
99+
for(unsigned int i = 0; i < component_texts.size(); i++)
100+
{
101+
//cout << "confidence: " << component_confidences[i] << " text:" << component_texts[i] << endl;
102+
if(component_confidences[i] > min_confidence)
103+
{
104+
output2 += component_texts[i];
105+
}
106+
}
107+
return String(output2);
108+
}
109+
110+
CV_WRAP String OCRBeamSearchDecoder::run(InputArray image, InputArray mask, int min_confidence, int component_level)
111+
{
112+
std::string output1;
113+
std::string output2;
114+
vector<string> component_texts;
115+
vector<float> component_confidences;
116+
Mat image_m = image.getMat();
117+
Mat mask_m = mask.getMat();
118+
run(image_m, mask_m, output1, NULL, &component_texts, &component_confidences, component_level);
119+
for(unsigned int i = 0; i < component_texts.size(); i++)
120+
{
121+
//cout << "confidence: " << component_confidences[i] << " text:" << component_texts[i] << endl;
122+
if(component_confidences[i] > min_confidence)
123+
{
124+
output2 += component_texts[i];
125+
}
126+
}
127+
return String(output2);
128+
}
129+
91130

92131
void OCRBeamSearchDecoder::ClassifierCallback::eval( InputArray image, vector< vector<double> >& recognition_probabilities, vector<int>& oversegmentation)
93132
{
@@ -460,6 +499,16 @@ Ptr<OCRBeamSearchDecoder> OCRBeamSearchDecoder::create( Ptr<OCRBeamSearchDecoder
460499
return makePtr<OCRBeamSearchDecoderImpl>(_classifier, _vocabulary, transition_p, emission_p, _mode, _beam_size);
461500
}
462501

502+
CV_EXPORTS_W Ptr<OCRBeamSearchDecoder> OCRBeamSearchDecoder::create(Ptr<OCRBeamSearchDecoder::ClassifierCallback> _classifier,
503+
const String& _vocabulary,
504+
InputArray transition_p,
505+
InputArray emission_p,
506+
int _mode,
507+
int _beam_size)
508+
{
509+
return makePtr<OCRBeamSearchDecoderImpl>(_classifier, _vocabulary, transition_p, emission_p, (decoder_mode)_mode, _beam_size);
510+
}
511+
463512

464513
class CV_EXPORTS OCRBeamSearchClassifierCNN : public OCRBeamSearchDecoder::ClassifierCallback
465514
{
@@ -727,11 +776,10 @@ double OCRBeamSearchClassifierCNN::eval_feature(Mat& feature, double* prob_estim
727776
return dec_max_idx;
728777
}
729778

730-
731-
Ptr<OCRBeamSearchDecoder::ClassifierCallback> loadOCRBeamSearchClassifierCNN(const std::string& filename)
779+
Ptr<OCRBeamSearchDecoder::ClassifierCallback> loadOCRBeamSearchClassifierCNN(const String& filename)
732780

733781
{
734-
return makePtr<OCRBeamSearchClassifierCNN>(filename);
782+
return makePtr<OCRBeamSearchClassifierCNN>(std::string(filename));
735783
}
736784

737785
}

modules/text/src/ocr_hmm_decoder.cpp

Lines changed: 66 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,46 @@ void OCRHMMDecoder::run(Mat& image, Mat& mask, string& output_text, vector<Rect>
9090
component_confidences->clear();
9191
}
9292

93+
CV_WRAP String OCRHMMDecoder::run(InputArray image, int min_confidence, int component_level)
94+
{
95+
std::string output1;
96+
std::string output2;
97+
vector<string> component_texts;
98+
vector<float> component_confidences;
99+
Mat image_m = image.getMat();
100+
run(image_m, output1, NULL, &component_texts, &component_confidences, component_level);
101+
for(unsigned int i = 0; i < component_texts.size(); i++)
102+
{
103+
//cout << "confidence: " << component_confidences[i] << " text:" << component_texts[i] << endl;
104+
if(component_confidences[i] > min_confidence)
105+
{
106+
output2 += component_texts[i];
107+
}
108+
}
109+
return String(output2);
110+
}
111+
112+
CV_WRAP cv::String OCRHMMDecoder::run(InputArray image, InputArray mask, int min_confidence, int component_level)
113+
{
114+
std::string output1;
115+
std::string output2;
116+
vector<string> component_texts;
117+
vector<float> component_confidences;
118+
Mat image_m = image.getMat();
119+
Mat mask_m = mask.getMat();
120+
run(image_m, mask_m, output1, NULL, &component_texts, &component_confidences, component_level);
121+
for(unsigned int i = 0; i < component_texts.size(); i++)
122+
{
123+
cout << "confidence: " << component_confidences[i] << " text:" << component_texts[i] << endl;
124+
125+
if(component_confidences[i] > min_confidence)
126+
{
127+
output2 += component_texts[i];
128+
}
129+
}
130+
return String(output2);
131+
}
132+
93133
void OCRHMMDecoder::ClassifierCallback::eval( InputArray image, vector<int>& out_class, vector<double>& out_confidence)
94134
{
95135
CV_Assert(( image.getMat().type() == CV_8UC3 ) || ( image.getMat().type() == CV_8UC1 ));
@@ -635,6 +675,16 @@ Ptr<OCRHMMDecoder> OCRHMMDecoder::create( Ptr<OCRHMMDecoder::ClassifierCallback>
635675
}
636676

637677

678+
Ptr<OCRHMMDecoder> OCRHMMDecoder::create( Ptr<OCRHMMDecoder::ClassifierCallback> _classifier,
679+
const String& _vocabulary,
680+
InputArray transition_p,
681+
InputArray emission_p,
682+
int _mode)
683+
{
684+
return makePtr<OCRHMMDecoderImpl>(_classifier, _vocabulary, transition_p, emission_p, (decoder_mode)_mode);
685+
}
686+
687+
638688
class CV_EXPORTS OCRHMMClassifierKNN : public OCRHMMDecoder::ClassifierCallback
639689
{
640690
public:
@@ -867,14 +917,12 @@ void OCRHMMClassifierKNN::eval( InputArray _mask, vector<int>& out_class, vector
867917
}
868918

869919

870-
Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierNM(const std::string& filename)
920+
Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierNM(const String& filename)
871921

872922
{
873-
return makePtr<OCRHMMClassifierKNN>(filename);
923+
return makePtr<OCRHMMClassifierKNN>(std::string(filename));
874924
}
875925

876-
877-
878926
class CV_EXPORTS OCRHMMClassifierCNN : public OCRHMMDecoder::ClassifierCallback
879927
{
880928
public:
@@ -1139,10 +1187,10 @@ double OCRHMMClassifierCNN::eval_feature(Mat& feature, double* prob_estimates)
11391187
}
11401188

11411189

1142-
Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierCNN(const std::string& filename)
1190+
Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierCNN(const String& filename)
11431191

11441192
{
1145-
return makePtr<OCRHMMClassifierCNN>(filename);
1193+
return makePtr<OCRHMMClassifierCNN>(std::string(filename));
11461194
}
11471195

11481196
/** @brief Utility function to create a tailored language model transitions table from a given list of words (lexicon).
@@ -1201,5 +1249,17 @@ void createOCRHMMTransitionsTable(string& vocabulary, vector<string>& lexicon, O
12011249
return;
12021250
}
12031251

1252+
Mat createOCRHMMTransitionsTable(const String& vocabulary, vector<cv::String>& lexicon)
1253+
{
1254+
std::string voc(vocabulary);
1255+
vector<string> lex;
1256+
for(vector<cv::String>::iterator l = lexicon.begin(); l != lexicon.end(); l++)
1257+
lex.push_back(std::string(*l));
1258+
1259+
Mat _transitions;
1260+
createOCRHMMTransitionsTable(voc, lex, _transitions);
1261+
return _transitions;
1262+
}
1263+
12041264
}
12051265
}

0 commit comments

Comments
 (0)