diff options
Diffstat (limited to 'thirdparty/linux/include/opencv2/text/ocr.hpp')
-rw-r--r-- | thirdparty/linux/include/opencv2/text/ocr.hpp | 470 |
1 files changed, 470 insertions, 0 deletions
diff --git a/thirdparty/linux/include/opencv2/text/ocr.hpp b/thirdparty/linux/include/opencv2/text/ocr.hpp new file mode 100644 index 0000000..1261046 --- /dev/null +++ b/thirdparty/linux/include/opencv2/text/ocr.hpp @@ -0,0 +1,470 @@ +/*M////////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef __OPENCV_TEXT_OCR_HPP__ +#define __OPENCV_TEXT_OCR_HPP__ + +#include <vector> +#include <string> + +namespace cv +{ +namespace text +{ + +//! @addtogroup text_recognize +//! @{ + +enum +{ + OCR_LEVEL_WORD, + OCR_LEVEL_TEXTLINE +}; + +//base class BaseOCR declares a common API that would be used in a typical text recognition scenario +class CV_EXPORTS_W BaseOCR +{ +public: + virtual ~BaseOCR() {}; + virtual void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL, + std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL, + int component_level=0) = 0; + virtual void run(Mat& image, Mat& mask, std::string& output_text, std::vector<Rect>* component_rects=NULL, + std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL, + int component_level=0) = 0; +}; + +/** @brief OCRTesseract class provides an interface with the tesseract-ocr API (v3.02.02) in C++. + +Notice that it is compiled only when tesseract-ocr is correctly installed. + +@note + - (C++) An example of OCRTesseract recognition combined with scene text detection can be found + at the end_to_end_recognition demo: + <https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/end_to_end_recognition.cpp> + - (C++) Another example of OCRTesseract recognition combined with scene text detection can be + found at the webcam_demo: + <https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/webcam_demo.cpp> + */ +class CV_EXPORTS_W OCRTesseract : public BaseOCR +{ +public: + /** @brief Recognize text using the tesseract-ocr API. + + Takes image on input and returns recognized text in the output_text parameter. Optionally + provides also the Rects for individual text elements found (e.g. words), and the list of those + text elements with their confidence values. + + @param image Input image CV_8UC1 or CV_8UC3 + @param output_text Output text of the tesseract-ocr. + @param component_rects If provided the method will output a list of Rects for the individual + text elements found (e.g. words or text lines). + @param component_texts If provided the method will output a list of text strings for the + recognition of individual text elements found (e.g. words or text lines). + @param component_confidences If provided the method will output a list of confidence values + for the recognition of individual text elements found (e.g. words or text lines). + @param component_level OCR_LEVEL_WORD (by default), or OCR_LEVEL_TEXT_LINE. + */ + virtual void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL, + std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL, + int component_level=0); + + virtual void run(Mat& image, Mat& mask, std::string& output_text, std::vector<Rect>* component_rects=NULL, + std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL, + int component_level=0); + + // aliases for scripting + CV_WRAP String run(InputArray image, int min_confidence, int component_level=0); + + CV_WRAP String run(InputArray image, InputArray mask, int min_confidence, int component_level=0); + + CV_WRAP virtual void setWhiteList(const String& char_whitelist) = 0; + + + /** @brief Creates an instance of the OCRTesseract class. Initializes Tesseract. + + @param datapath the name of the parent directory of tessdata ended with "/", or NULL to use the + system's default directory. + @param language an ISO 639-3 code or NULL will default to "eng". + @param char_whitelist specifies the list of characters used for recognition. NULL defaults to + "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ". + @param oem tesseract-ocr offers different OCR Engine Modes (OEM), by deffault + tesseract::OEM_DEFAULT is used. See the tesseract-ocr API documentation for other possible + values. + @param psmode tesseract-ocr offers different Page Segmentation Modes (PSM) tesseract::PSM_AUTO + (fully automatic layout analysis) is used. See the tesseract-ocr API documentation for other + possible values. + */ + CV_WRAP static Ptr<OCRTesseract> create(const char* datapath=NULL, const char* language=NULL, + const char* char_whitelist=NULL, int oem=3, int psmode=3); +}; + + +/* OCR HMM Decoder */ + +enum decoder_mode +{ + OCR_DECODER_VITERBI = 0 // Other algorithms may be added +}; + +/** @brief OCRHMMDecoder class provides an interface for OCR using Hidden Markov Models. + +@note + - (C++) An example on using OCRHMMDecoder recognition combined with scene text detection can + be found at the webcam_demo sample: + <https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/webcam_demo.cpp> + */ +class CV_EXPORTS_W OCRHMMDecoder : public BaseOCR +{ +public: + + /** @brief Callback with the character classifier is made a class. + + This way it hides the feature extractor and the classifier itself, so developers can write + their own OCR code. + + The default character classifier and feature extractor can be loaded using the utility funtion + loadOCRHMMClassifierNM and KNN model provided in + <https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/OCRHMM_knn_model_data.xml.gz>. + */ + class CV_EXPORTS_W ClassifierCallback + { + public: + virtual ~ClassifierCallback() { } + /** @brief The character classifier must return a (ranked list of) class(es) id('s) + + @param image Input image CV_8UC1 or CV_8UC3 with a single letter. + @param out_class The classifier returns the character class categorical label, or list of + class labels, to which the input image corresponds. + @param out_confidence The classifier returns the probability of the input image + corresponding to each classes in out_class. + */ + virtual void eval( InputArray image, std::vector<int>& out_class, std::vector<double>& out_confidence); + }; + +public: + /** @brief Recognize text using HMM. + + Takes binary image on input and returns recognized text in the output_text parameter. Optionally + provides also the Rects for individual text elements found (e.g. words), and the list of those + text elements with their confidence values. + + @param image Input binary image CV_8UC1 with a single text line (or word). + + @param output_text Output text. Most likely character sequence found by the HMM decoder. + + @param component_rects If provided the method will output a list of Rects for the individual + text elements found (e.g. words). + + @param component_texts If provided the method will output a list of text strings for the + recognition of individual text elements found (e.g. words). + + @param component_confidences If provided the method will output a list of confidence values + for the recognition of individual text elements found (e.g. words). + + @param component_level Only OCR_LEVEL_WORD is supported. + */ + virtual void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL, + std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL, + int component_level=0); + + /** @brief Recognize text using HMM. + + Takes an image and a mask (where each connected component corresponds to a segmented character) + on input and returns recognized text in the output_text parameter. Optionally + provides also the Rects for individual text elements found (e.g. words), and the list of those + text elements with their confidence values. + + @param image Input image CV_8UC1 or CV_8UC3 with a single text line (or word). + @param mask Input binary image CV_8UC1 same size as input image. Each connected component in mask corresponds to a segmented character in the input image. + + @param output_text Output text. Most likely character sequence found by the HMM decoder. + + @param component_rects If provided the method will output a list of Rects for the individual + text elements found (e.g. words). + + @param component_texts If provided the method will output a list of text strings for the + recognition of individual text elements found (e.g. words). + + @param component_confidences If provided the method will output a list of confidence values + for the recognition of individual text elements found (e.g. words). + + @param component_level Only OCR_LEVEL_WORD is supported. + */ + virtual void run(Mat& image, Mat& mask, std::string& output_text, std::vector<Rect>* component_rects=NULL, + std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL, + int component_level=0); + + // aliases for scripting + CV_WRAP String run(InputArray image, int min_confidence, int component_level=0); + + CV_WRAP String run(InputArray image, InputArray mask, int min_confidence, int component_level=0); + + /** @brief Creates an instance of the OCRHMMDecoder class. Initializes HMMDecoder. + + @param classifier The character classifier with built in feature extractor. + + @param vocabulary The language vocabulary (chars when ascii english text). vocabulary.size() + must be equal to the number of classes of the classifier. + + @param transition_probabilities_table Table with transition probabilities between character + pairs. cols == rows == vocabulary.size(). + + @param emission_probabilities_table Table with observation emission probabilities. cols == + rows == vocabulary.size(). + + @param mode HMM Decoding algorithm. Only OCR_DECODER_VITERBI is available for the moment + (<http://en.wikipedia.org/wiki/Viterbi_algorithm>). + */ + static Ptr<OCRHMMDecoder> create(const Ptr<OCRHMMDecoder::ClassifierCallback> classifier,// The character classifier with built in feature extractor + const std::string& vocabulary, // The language vocabulary (chars when ascii english text) + // size() must be equal to the number of classes + InputArray transition_probabilities_table, // Table with transition probabilities between character pairs + // cols == rows == vocabulari.size() + InputArray emission_probabilities_table, // Table with observation emission probabilities + // cols == rows == vocabulari.size() + decoder_mode mode = OCR_DECODER_VITERBI); // HMM Decoding algorithm (only Viterbi for the moment) + + CV_WRAP static Ptr<OCRHMMDecoder> create(const Ptr<OCRHMMDecoder::ClassifierCallback> classifier,// The character classifier with built in feature extractor + const String& vocabulary, // The language vocabulary (chars when ascii english text) + // size() must be equal to the number of classes + InputArray transition_probabilities_table, // Table with transition probabilities between character pairs + // cols == rows == vocabulari.size() + InputArray emission_probabilities_table, // Table with observation emission probabilities + // cols == rows == vocabulari.size() + int mode = OCR_DECODER_VITERBI); // HMM Decoding algorithm (only Viterbi for the moment) + +protected: + + Ptr<OCRHMMDecoder::ClassifierCallback> classifier; + std::string vocabulary; + Mat transition_p; + Mat emission_p; + decoder_mode mode; +}; + +/** @brief Allow to implicitly load the default character classifier when creating an OCRHMMDecoder object. + +@param filename The XML or YAML file with the classifier model (e.g. OCRHMM_knn_model_data.xml) + +The KNN default classifier is based in the scene text recognition method proposed by Lukás Neumann & +Jiri Matas in [Neumann11b]. Basically, the region (contour) in the input image is normalized to a +fixed size, while retaining the centroid and aspect ratio, in order to extract a feature vector +based on gradient orientations along the chain-code of its perimeter. Then, the region is classified +using a KNN model trained with synthetic data of rendered characters with different standard font +types. + */ + +CV_EXPORTS_W Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierNM(const String& filename); + +/** @brief Allow to implicitly load the default character classifier when creating an OCRHMMDecoder object. + +@param filename The XML or YAML file with the classifier model (e.g. OCRBeamSearch_CNN_model_data.xml.gz) + +The CNN default classifier is based in the scene text recognition method proposed by Adam Coates & +Andrew NG in [Coates11a]. The character classifier consists in a Single Layer Convolutional Neural Network and +a linear classifier. It is applied to the input image in a sliding window fashion, providing a set of recognitions +at each window location. + */ +CV_EXPORTS_W Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierCNN(const String& filename); + +//! @} + +/** @brief Utility function to create a tailored language model transitions table from a given list of words (lexicon). + * + * @param vocabulary The language vocabulary (chars when ascii english text). + * + * @param lexicon The list of words that are expected to be found in a particular image. + * + * @param transition_probabilities_table Output table with transition probabilities between character pairs. cols == rows == vocabulary.size(). + * + * The function calculate frequency statistics of character pairs from the given lexicon and fills the output transition_probabilities_table with them. The transition_probabilities_table can be used as input in the OCRHMMDecoder::create() and OCRBeamSearchDecoder::create() methods. + * @note + * - (C++) An alternative would be to load the default generic language transition table provided in the text module samples folder (created from ispell 42869 english words list) : + * <https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/OCRHMM_transitions_table.xml> + **/ +CV_EXPORTS void createOCRHMMTransitionsTable(std::string& vocabulary, std::vector<std::string>& lexicon, OutputArray transition_probabilities_table); + +CV_EXPORTS_W Mat createOCRHMMTransitionsTable(const String& vocabulary, std::vector<cv::String>& lexicon); + + +/* OCR BeamSearch Decoder */ + +/** @brief OCRBeamSearchDecoder class provides an interface for OCR using Beam Search algorithm. + +@note + - (C++) An example on using OCRBeamSearchDecoder recognition combined with scene text detection can + be found at the demo sample: + <https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/word_recognition.cpp> + */ +class CV_EXPORTS_W OCRBeamSearchDecoder : public BaseOCR +{ +public: + + /** @brief Callback with the character classifier is made a class. + + This way it hides the feature extractor and the classifier itself, so developers can write + their own OCR code. + + The default character classifier and feature extractor can be loaded using the utility funtion + loadOCRBeamSearchClassifierCNN with all its parameters provided in + <https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/OCRBeamSearch_CNN_model_data.xml.gz>. + */ + class CV_EXPORTS_W ClassifierCallback + { + public: + virtual ~ClassifierCallback() { } + /** @brief The character classifier must return a (ranked list of) class(es) id('s) + + @param image Input image CV_8UC1 or CV_8UC3 with a single letter. + @param recognition_probabilities For each of the N characters found the classifier returns a list with + class probabilities for each class. + @param oversegmentation The classifier returns a list of N+1 character locations' x-coordinates, + including 0 as start-sequence location. + */ + virtual void eval( InputArray image, std::vector< std::vector<double> >& recognition_probabilities, std::vector<int>& oversegmentation ); + + int getWindowSize() {return 0;} + int getStepSize() {return 0;} + }; + +public: + /** @brief Recognize text using Beam Search. + + Takes image on input and returns recognized text in the output_text parameter. Optionally + provides also the Rects for individual text elements found (e.g. words), and the list of those + text elements with their confidence values. + + @param image Input binary image CV_8UC1 with a single text line (or word). + + @param output_text Output text. Most likely character sequence found by the HMM decoder. + + @param component_rects If provided the method will output a list of Rects for the individual + text elements found (e.g. words). + + @param component_texts If provided the method will output a list of text strings for the + recognition of individual text elements found (e.g. words). + + @param component_confidences If provided the method will output a list of confidence values + for the recognition of individual text elements found (e.g. words). + + @param component_level Only OCR_LEVEL_WORD is supported. + */ + virtual void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL, + std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL, + int component_level=0); + + virtual void run(Mat& image, Mat& mask, std::string& output_text, std::vector<Rect>* component_rects=NULL, + std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL, + int component_level=0); + + // aliases for scripting + CV_WRAP String run(InputArray image, int min_confidence, int component_level=0); + + CV_WRAP String run(InputArray image, InputArray mask, int min_confidence, int component_level=0); + + /** @brief Creates an instance of the OCRBeamSearchDecoder class. Initializes HMMDecoder. + + @param classifier The character classifier with built in feature extractor. + + @param vocabulary The language vocabulary (chars when ascii english text). vocabulary.size() + must be equal to the number of classes of the classifier. + + @param transition_probabilities_table Table with transition probabilities between character + pairs. cols == rows == vocabulary.size(). + + @param emission_probabilities_table Table with observation emission probabilities. cols == + rows == vocabulary.size(). + + @param mode HMM Decoding algorithm. Only OCR_DECODER_VITERBI is available for the moment + (<http://en.wikipedia.org/wiki/Viterbi_algorithm>). + + @param beam_size Size of the beam in Beam Search algorithm. + */ + static Ptr<OCRBeamSearchDecoder> create(const Ptr<OCRBeamSearchDecoder::ClassifierCallback> classifier,// The character classifier with built in feature extractor + const std::string& vocabulary, // The language vocabulary (chars when ascii english text) + // size() must be equal to the number of classes + InputArray transition_probabilities_table, // Table with transition probabilities between character pairs + // cols == rows == vocabulari.size() + InputArray emission_probabilities_table, // Table with observation emission probabilities + // cols == rows == vocabulari.size() + decoder_mode mode = OCR_DECODER_VITERBI, // HMM Decoding algorithm (only Viterbi for the moment) + int beam_size = 500); // Size of the beam in Beam Search algorithm + + CV_WRAP static Ptr<OCRBeamSearchDecoder> create(const Ptr<OCRBeamSearchDecoder::ClassifierCallback> classifier, // The character classifier with built in feature extractor + const String& vocabulary, // The language vocabulary (chars when ascii english text) + // size() must be equal to the number of classes + InputArray transition_probabilities_table, // Table with transition probabilities between character pairs + // cols == rows == vocabulari.size() + InputArray emission_probabilities_table, // Table with observation emission probabilities + // cols == rows == vocabulari.size() + int mode = OCR_DECODER_VITERBI, // HMM Decoding algorithm (only Viterbi for the moment) + int beam_size = 500); // Size of the beam in Beam Search algorithm + +protected: + + Ptr<OCRBeamSearchDecoder::ClassifierCallback> classifier; + std::string vocabulary; + Mat transition_p; + Mat emission_p; + decoder_mode mode; + int beam_size; +}; + +/** @brief Allow to implicitly load the default character classifier when creating an OCRBeamSearchDecoder object. + +@param filename The XML or YAML file with the classifier model (e.g. OCRBeamSearch_CNN_model_data.xml.gz) + +The CNN default classifier is based in the scene text recognition method proposed by Adam Coates & +Andrew NG in [Coates11a]. The character classifier consists in a Single Layer Convolutional Neural Network and +a linear classifier. It is applied to the input image in a sliding window fashion, providing a set of recognitions +at each window location. + */ + +CV_EXPORTS_W Ptr<OCRBeamSearchDecoder::ClassifierCallback> loadOCRBeamSearchClassifierCNN(const String& filename); + +//! @} + +} +} +#endif // _OPENCV_TEXT_OCR_HPP_ |