text python bindings

previ · previ · commit fe0568162705 · 2015-11-06T20:00:28.000Z
diff --git a/modules/text/include/opencv2/text/ocr.hpp b/modules/text/include/opencv2/text/ocr.hpp
@@ -1,4 +1,4 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
+/*M//////////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
@@ -62,7 +62,7 @@ enum
 };
 
 //base class BaseOCR declares a common API that would be used in a typical text recognition scenario
-class CV_EXPORTS BaseOCR
+class CV_EXPORTS_W BaseOCR
 {
 public:
     virtual ~BaseOCR() {};
@@ -86,7 +86,7 @@ Notice that it is compiled only when tesseract-ocr is correctly installed.
         found at the webcam_demo:
         <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/webcam_demo.cpp>
  */
-class CV_EXPORTS OCRTesseract : public BaseOCR
+class CV_EXPORTS_W OCRTesseract : public BaseOCR
 {
 public:
     /** @brief Recognize text using the tesseract-ocr API.
@@ -113,6 +113,14 @@ class CV_EXPORTS OCRTesseract : public BaseOCR
                      std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
                      int component_level=0);
 
+    // aliases for scripting
+    CV_WRAP String run(InputArray image, int min_confidence, int component_level=0);
+
+    CV_WRAP String run(InputArray image, InputArray mask, int min_confidence, int component_level=0);
+
+    CV_WRAP virtual void setWhiteList(const String& char_whitelist) = 0;
+
+
     /** @brief Creates an instance of the OCRTesseract class. Initializes Tesseract.
 
     @param datapath the name of the parent directory of tessdata ended with "/", or NULL to use the
@@ -127,7 +135,7 @@ class CV_EXPORTS OCRTesseract : public BaseOCR
     (fully automatic layout analysis) is used. See the tesseract-ocr API documentation for other
     possible values.
      */
-    static Ptr<OCRTesseract> create(const char* datapath=NULL, const char* language=NULL,
+    CV_WRAP static Ptr<OCRTesseract> create(const char* datapath=NULL, const char* language=NULL,
                                     const char* char_whitelist=NULL, int oem=3, int psmode=3);
 };
 
@@ -146,7 +154,7 @@ enum decoder_mode
         be found at the webcam_demo sample:
         <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/webcam_demo.cpp>
  */
-class CV_EXPORTS OCRHMMDecoder : public BaseOCR
+class CV_EXPORTS_W OCRHMMDecoder : public BaseOCR
 {
 public:
 
@@ -159,7 +167,7 @@ class CV_EXPORTS OCRHMMDecoder : public BaseOCR
     loadOCRHMMClassifierNM and KNN model provided in
     <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/OCRHMM_knn_model_data.xml.gz>.
      */
-    class CV_EXPORTS ClassifierCallback
+    class CV_EXPORTS_W ClassifierCallback
     {
     public:
         virtual ~ClassifierCallback() { }
@@ -227,6 +235,11 @@ class CV_EXPORTS OCRHMMDecoder : public BaseOCR
                      std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
                      int component_level=0);
 
+    // aliases for scripting
+    CV_WRAP String run(InputArray image, int min_confidence, int component_level=0);
+
+    CV_WRAP String run(InputArray image, InputArray mask, int min_confidence, int component_level=0);
+
     /** @brief Creates an instance of the OCRHMMDecoder class. Initializes HMMDecoder.
 
     @param classifier The character classifier with built in feature extractor.
@@ -252,6 +265,15 @@ class CV_EXPORTS OCRHMMDecoder : public BaseOCR
                                                                                        //     cols == rows == vocabulari.size()
                                      decoder_mode mode = OCR_DECODER_VITERBI);         // HMM Decoding algorithm (only Viterbi for the moment)
 
+    CV_WRAP static Ptr<OCRHMMDecoder> create(const Ptr<OCRHMMDecoder::ClassifierCallback> classifier,// The character classifier with built in feature extractor
+                                     const String& vocabulary,                    // The language vocabulary (chars when ascii english text)
+                                                                                       //     size() must be equal to the number of classes
+                                     InputArray transition_probabilities_table,        // Table with transition probabilities between character pairs
+                                                                                       //     cols == rows == vocabulari.size()
+                                     InputArray emission_probabilities_table,          // Table with observation emission probabilities
+                                                                                       //     cols == rows == vocabulari.size()
+                                     int mode = OCR_DECODER_VITERBI);         // HMM Decoding algorithm (only Viterbi for the moment)
+
 protected:
 
     Ptr<OCRHMMDecoder::ClassifierCallback> classifier;
@@ -272,7 +294,8 @@ based on gradient orientations along the chain-code of its perimeter. Then, the
 using a KNN model trained with synthetic data of rendered characters with different standard font
 types.
  */
-CV_EXPORTS Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierNM(const std::string& filename);
+
+CV_EXPORTS_W Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierNM(const String& filename);
 
 /** @brief Allow to implicitly load the default character classifier when creating an OCRHMMDecoder object.
 
@@ -283,7 +306,7 @@ Andrew NG in [Coates11a]. The character classifier consists in a Single Layer Co
 a linear classifier. It is applied to the input image in a sliding window fashion, providing a set of recognitions
 at each window location.
  */
-CV_EXPORTS Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierCNN(const std::string& filename);
+CV_EXPORTS_W Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierCNN(const String& filename);
 
 //! @}
 
@@ -299,9 +322,11 @@ CV_EXPORTS Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierCNN(const
  * @note
  *    -   (C++) An alternative would be to load the default generic language transition table provided in the text module samples folder (created from ispell 42869 english words list) :
  *            <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/OCRHMM_transitions_table.xml>
- *             */
+ **/
 CV_EXPORTS void createOCRHMMTransitionsTable(std::string& vocabulary, std::vector<std::string>& lexicon, OutputArray transition_probabilities_table);
 
+CV_EXPORTS_W Mat createOCRHMMTransitionsTable(const String& vocabulary, std::vector<cv::String>& lexicon);
+
 
 /* OCR BeamSearch Decoder */
 
@@ -312,7 +337,7 @@ CV_EXPORTS void createOCRHMMTransitionsTable(std::string& vocabulary, std::vecto
         be found at the demo sample:
         <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/word_recognition.cpp>
  */
-class CV_EXPORTS OCRBeamSearchDecoder : public BaseOCR
+class CV_EXPORTS_W OCRBeamSearchDecoder : public BaseOCR
 {
 public:
 
@@ -325,7 +350,7 @@ class CV_EXPORTS OCRBeamSearchDecoder : public BaseOCR
     loadOCRBeamSearchClassifierCNN with all its parameters provided in
     <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/OCRBeamSearch_CNN_model_data.xml.gz>.
      */
-    class CV_EXPORTS ClassifierCallback
+    class CV_EXPORTS_W ClassifierCallback
     {
     public:
         virtual ~ClassifierCallback() { }
@@ -350,7 +375,7 @@ class CV_EXPORTS OCRBeamSearchDecoder : public BaseOCR
     provides also the Rects for individual text elements found (e.g. words), and the list of those
     text elements with their confidence values.
 
-    @param image Input image CV_8UC1 with a single text line (or word).
+    @param image Input binary image CV_8UC1 with a single text line (or word).
 
     @param output_text Output text. Most likely character sequence found by the HMM decoder.
 
@@ -373,6 +398,11 @@ class CV_EXPORTS OCRBeamSearchDecoder : public BaseOCR
                      std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
                      int component_level=0);
 
+    // aliases for scripting
+    CV_WRAP String run(InputArray image, int min_confidence, int component_level=0);
+
+    CV_WRAP String run(InputArray image, InputArray mask, int min_confidence, int component_level=0);
+
     /** @brief Creates an instance of the OCRBeamSearchDecoder class. Initializes HMMDecoder.
 
     @param classifier The character classifier with built in feature extractor.
@@ -401,6 +431,16 @@ class CV_EXPORTS OCRBeamSearchDecoder : public BaseOCR
                                      decoder_mode mode = OCR_DECODER_VITERBI,          // HMM Decoding algorithm (only Viterbi for the moment)
                                      int beam_size = 500);                              // Size of the beam in Beam Search algorithm
 
+    CV_WRAP static Ptr<OCRBeamSearchDecoder> create(const Ptr<OCRBeamSearchDecoder::ClassifierCallback> classifier, // The character classifier with built in feature extractor
+                                     const String& vocabulary,                    // The language vocabulary (chars when ascii english text)
+                                                                                       //     size() must be equal to the number of classes
+                                     InputArray transition_probabilities_table,        // Table with transition probabilities between character pairs
+                                                                                       //     cols == rows == vocabulari.size()
+                                     InputArray emission_probabilities_table,          // Table with observation emission probabilities
+                                                                                       //     cols == rows == vocabulari.size()
+                                     int mode = OCR_DECODER_VITERBI,          // HMM Decoding algorithm (only Viterbi for the moment)
+                                     int beam_size = 500);                              // Size of the beam in Beam Search algorithm
+
 protected:
 
     Ptr<OCRBeamSearchDecoder::ClassifierCallback> classifier;
@@ -420,7 +460,8 @@ Andrew NG in [Coates11a]. The character classifier consists in a Single Layer Co
 a linear classifier. It is applied to the input image in a sliding window fashion, providing a set of recognitions
 at each window location.
  */
-CV_EXPORTS Ptr<OCRBeamSearchDecoder::ClassifierCallback> loadOCRBeamSearchClassifierCNN(const std::string& filename);
+
+CV_EXPORTS_W Ptr<OCRBeamSearchDecoder::ClassifierCallback> loadOCRBeamSearchClassifierCNN(const String& filename);
 
 //! @}
 
diff --git a/modules/text/src/ocr_beamsearch_decoder.cpp b/modules/text/src/ocr_beamsearch_decoder.cpp
@@ -88,6 +88,45 @@ void OCRBeamSearchDecoder::run(Mat& image, Mat& mask, string& output_text, vecto
         component_confidences->clear();
 }
 
+CV_WRAP String OCRBeamSearchDecoder::run(InputArray image, int min_confidence, int component_level)
+{
+    std::string output1;
+    std::string output2;
+    vector<string> component_texts;
+    vector<float> component_confidences;
+    Mat image_m = image.getMat();
+    run(image_m, output1, NULL, &component_texts, &component_confidences, component_level);
+    for(unsigned int i = 0; i < component_texts.size(); i++)
+    {
+        //cout << "confidence: " << component_confidences[i] << " text:" << component_texts[i] << endl;
+        if(component_confidences[i] > min_confidence)
+        {
+            output2 += component_texts[i];
+        }
+    }
+    return String(output2);
+}
+
+CV_WRAP String OCRBeamSearchDecoder::run(InputArray image, InputArray mask, int min_confidence, int component_level)
+{
+    std::string output1;
+    std::string output2;
+    vector<string> component_texts;
+    vector<float> component_confidences;
+    Mat image_m = image.getMat();
+    Mat mask_m = mask.getMat();
+    run(image_m, mask_m, output1, NULL, &component_texts, &component_confidences, component_level);
+    for(unsigned int i = 0; i < component_texts.size(); i++)
+    {
+        //cout << "confidence: " << component_confidences[i] << " text:" << component_texts[i] << endl;
+        if(component_confidences[i] > min_confidence)
+        {
+            output2 += component_texts[i];
+        }
+    }
+    return String(output2);
+}
+
 
 void OCRBeamSearchDecoder::ClassifierCallback::eval( InputArray image, vector< vector<double> >& recognition_probabilities, vector<int>& oversegmentation)
 {
@@ -460,6 +499,16 @@ Ptr<OCRBeamSearchDecoder> OCRBeamSearchDecoder::create( Ptr<OCRBeamSearchDecoder
     return makePtr<OCRBeamSearchDecoderImpl>(_classifier, _vocabulary, transition_p, emission_p, _mode, _beam_size);
 }
 
+CV_EXPORTS_W Ptr<OCRBeamSearchDecoder> OCRBeamSearchDecoder::create(Ptr<OCRBeamSearchDecoder::ClassifierCallback> _classifier,
+                                                        const String& _vocabulary,
+                                                        InputArray transition_p,
+                                                        InputArray emission_p,
+                                                        int _mode,
+                                                        int _beam_size)
+{
+    return makePtr<OCRBeamSearchDecoderImpl>(_classifier, _vocabulary, transition_p, emission_p, (decoder_mode)_mode, _beam_size);
+}
+
 
 class CV_EXPORTS OCRBeamSearchClassifierCNN : public OCRBeamSearchDecoder::ClassifierCallback
 {
@@ -727,11 +776,10 @@ double OCRBeamSearchClassifierCNN::eval_feature(Mat& feature, double* prob_estim
     return dec_max_idx;
 }
 
-
-Ptr<OCRBeamSearchDecoder::ClassifierCallback> loadOCRBeamSearchClassifierCNN(const std::string& filename)
+Ptr<OCRBeamSearchDecoder::ClassifierCallback> loadOCRBeamSearchClassifierCNN(const String& filename)
 
 {
-    return makePtr<OCRBeamSearchClassifierCNN>(filename);
+    return makePtr<OCRBeamSearchClassifierCNN>(std::string(filename));
 }
 
 }
diff --git a/modules/text/src/ocr_hmm_decoder.cpp b/modules/text/src/ocr_hmm_decoder.cpp
@@ -90,6 +90,46 @@ void OCRHMMDecoder::run(Mat& image, Mat& mask, string& output_text, vector<Rect>
         component_confidences->clear();
 }
 
+CV_WRAP String OCRHMMDecoder::run(InputArray image, int min_confidence, int component_level)
+{
+    std::string output1;
+    std::string output2;
+    vector<string> component_texts;
+    vector<float> component_confidences;
+    Mat image_m = image.getMat();
+    run(image_m, output1, NULL, &component_texts, &component_confidences, component_level);
+    for(unsigned int i = 0; i < component_texts.size(); i++)
+    {
+        //cout << "confidence: " << component_confidences[i] << " text:" << component_texts[i] << endl;
+        if(component_confidences[i] > min_confidence)
+        {
+            output2 += component_texts[i];
+        }
+    }
+    return String(output2);
+}
+
+CV_WRAP cv::String OCRHMMDecoder::run(InputArray image, InputArray mask, int min_confidence, int component_level)
+{
+    std::string output1;
+    std::string output2;
+    vector<string> component_texts;
+    vector<float> component_confidences;
+    Mat image_m = image.getMat();
+    Mat mask_m = mask.getMat();
+    run(image_m, mask_m, output1, NULL, &component_texts, &component_confidences, component_level);
+    for(unsigned int i = 0; i < component_texts.size(); i++)
+    {
+        cout << "confidence: " << component_confidences[i] << " text:" << component_texts[i] << endl;
+
+        if(component_confidences[i] > min_confidence)
+        {
+            output2 += component_texts[i];
+        }
+    }
+    return String(output2);
+}
+
 void OCRHMMDecoder::ClassifierCallback::eval( InputArray image, vector<int>& out_class, vector<double>& out_confidence)
 {
     CV_Assert(( image.getMat().type() == CV_8UC3 ) || ( image.getMat().type() == CV_8UC1 ));
@@ -635,6 +675,16 @@ Ptr<OCRHMMDecoder> OCRHMMDecoder::create( Ptr<OCRHMMDecoder::ClassifierCallback>
 }
 
 
+Ptr<OCRHMMDecoder> OCRHMMDecoder::create( Ptr<OCRHMMDecoder::ClassifierCallback> _classifier,
+                                          const String& _vocabulary,
+                                          InputArray transition_p,
+                                          InputArray emission_p,
+                                          int _mode)
+{
+    return makePtr<OCRHMMDecoderImpl>(_classifier, _vocabulary, transition_p, emission_p, (decoder_mode)_mode);
+}
+
+
 class CV_EXPORTS OCRHMMClassifierKNN : public OCRHMMDecoder::ClassifierCallback
 {
 public:
@@ -867,14 +917,12 @@ void OCRHMMClassifierKNN::eval( InputArray _mask, vector<int>& out_class, vector
 }
 
 
-Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierNM(const std::string& filename)
+Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierNM(const String& filename)
 
 {
-    return makePtr<OCRHMMClassifierKNN>(filename);
+    return makePtr<OCRHMMClassifierKNN>(std::string(filename));
 }
 
-
-
 class CV_EXPORTS OCRHMMClassifierCNN : public OCRHMMDecoder::ClassifierCallback
 {
 public:
@@ -1139,10 +1187,10 @@ double OCRHMMClassifierCNN::eval_feature(Mat& feature, double* prob_estimates)
 }
 
 
-Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierCNN(const std::string& filename)
+Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierCNN(const String& filename)
 
 {
-    return makePtr<OCRHMMClassifierCNN>(filename);
+    return makePtr<OCRHMMClassifierCNN>(std::string(filename));
 }
 
 /** @brief Utility function to create a tailored language model transitions table from a given list of words (lexicon).
@@ -1201,5 +1249,17 @@ void createOCRHMMTransitionsTable(string& vocabulary, vector<string>& lexicon, O
     return;
 }
 
+Mat createOCRHMMTransitionsTable(const String& vocabulary, vector<cv::String>& lexicon)
+{
+    std::string voc(vocabulary);
+    vector<string> lex;
+    for(vector<cv::String>::iterator l = lexicon.begin(); l != lexicon.end(); l++)
+      lex.push_back(std::string(*l));
+
+    Mat _transitions;
+    createOCRHMMTransitionsTable(voc, lex, _transitions);
+    return _transitions;
+}
+
 }
 }
diff --git a/modules/text/src/ocr_tesseract.cpp b/modules/text/src/ocr_tesseract.cpp