diff options
Diffstat (limited to 'thirdparty/linux/include/opencv2/text')
-rw-r--r-- | thirdparty/linux/include/opencv2/text/erfilter.hpp | 360 | ||||
-rw-r--r-- | thirdparty/linux/include/opencv2/text/ocr.hpp | 470 |
2 files changed, 830 insertions, 0 deletions
diff --git a/thirdparty/linux/include/opencv2/text/erfilter.hpp b/thirdparty/linux/include/opencv2/text/erfilter.hpp new file mode 100644 index 0000000..af983c6 --- /dev/null +++ b/thirdparty/linux/include/opencv2/text/erfilter.hpp @@ -0,0 +1,360 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef __OPENCV_TEXT_ERFILTER_HPP__ +#define __OPENCV_TEXT_ERFILTER_HPP__ + +#include "opencv2/core.hpp" +#include <vector> +#include <deque> +#include <string> + +namespace cv +{ +namespace text +{ + +//! @addtogroup text_detect +//! @{ + +/** @brief The ERStat structure represents a class-specific Extremal Region (ER). + +An ER is a 4-connected set of pixels with all its grey-level values smaller than the values in its +outer boundary. A class-specific ER is selected (using a classifier) from all the ER's in the +component tree of the image. : + */ +struct CV_EXPORTS ERStat +{ +public: + //! Constructor + explicit ERStat(int level = 256, int pixel = 0, int x = 0, int y = 0); + //! Destructor + ~ERStat() { } + + //! seed point and the threshold (max grey-level value) + int pixel; + int level; + + //! incrementally computable features + int area; + int perimeter; + int euler; //!< euler number + Rect rect; + double raw_moments[2]; //!< order 1 raw moments to derive the centroid + double central_moments[3]; //!< order 2 central moments to construct the covariance matrix + Ptr<std::deque<int> > crossings;//!< horizontal crossings + float med_crossings; //!< median of the crossings at three different height levels + + //! 2nd stage features + float hole_area_ratio; + float convex_hull_ratio; + float num_inflexion_points; + + // TODO Other features can be added (average color, standard deviation, and such) + + + // TODO shall we include the pixel list whenever available (i.e. after 2nd stage) ? + std::vector<int> *pixels; + + //! probability that the ER belongs to the class we are looking for + double probability; + + //! pointers preserving the tree structure of the component tree + ERStat* parent; + ERStat* child; + ERStat* next; + ERStat* prev; + + //! wenever the regions is a local maxima of the probability + bool local_maxima; + ERStat* max_probability_ancestor; + ERStat* min_probability_ancestor; +}; + +/** @brief Base class for 1st and 2nd stages of Neumann and Matas scene text detection algorithm [Neumann12]. : + +Extracts the component tree (if needed) and filter the extremal regions (ER's) by using a given classifier. + */ +class CV_EXPORTS_W ERFilter : public Algorithm +{ +public: + + /** @brief Callback with the classifier is made a class. + + By doing it we hide SVM, Boost etc. Developers can provide their own classifiers to the + ERFilter algorithm. + */ + class CV_EXPORTS_W Callback + { + public: + virtual ~Callback() { } + /** @brief The classifier must return probability measure for the region. + + @param stat : The region to be classified + */ + virtual double eval(const ERStat& stat) = 0; //const = 0; //TODO why cannot use const = 0 here? + }; + + /** @brief The key method of ERFilter algorithm. + + Takes image on input and returns the selected regions in a vector of ERStat only distinctive + ERs which correspond to characters are selected by a sequential classifier + + @param image Single channel image CV_8UC1 + + @param regions Output for the 1st stage and Input/Output for the 2nd. The selected Extremal Regions + are stored here. + + Extracts the component tree (if needed) and filter the extremal regions (ER's) by using a given + classifier. + */ + virtual void run( InputArray image, std::vector<ERStat>& regions ) = 0; + + + //! set/get methods to set the algorithm properties, + virtual void setCallback(const Ptr<ERFilter::Callback>& cb) = 0; + virtual void setThresholdDelta(int thresholdDelta) = 0; + virtual void setMinArea(float minArea) = 0; + virtual void setMaxArea(float maxArea) = 0; + virtual void setMinProbability(float minProbability) = 0; + virtual void setMinProbabilityDiff(float minProbabilityDiff) = 0; + virtual void setNonMaxSuppression(bool nonMaxSuppression) = 0; + virtual int getNumRejected() = 0; +}; + + +/*! + Create an Extremal Region Filter for the 1st stage classifier of N&M algorithm + Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012 + + The component tree of the image is extracted by a threshold increased step by step + from 0 to 255, incrementally computable descriptors (aspect_ratio, compactness, + number of holes, and number of horizontal crossings) are computed for each ER + and used as features for a classifier which estimates the class-conditional + probability P(er|character). The value of P(er|character) is tracked using the inclusion + relation of ER across all thresholds and only the ERs which correspond to local maximum + of the probability P(er|character) are selected (if the local maximum of the + probability is above a global limit pmin and the difference between local maximum and + local minimum is greater than minProbabilityDiff). + + @param cb – Callback with the classifier. Default classifier can be implicitly load with function + loadClassifierNM1(), e.g. from file in samples/cpp/trained_classifierNM1.xml + @param thresholdDelta – Threshold step in subsequent thresholds when extracting the component tree + @param minArea – The minimum area (% of image size) allowed for retreived ER’s + @param maxArea – The maximum area (% of image size) allowed for retreived ER’s + @param minProbability – The minimum probability P(er|character) allowed for retreived ER’s + @param nonMaxSuppression – Whenever non-maximum suppression is done over the branch probabilities + @param minProbabilityDiff – The minimum probability difference between local maxima and local minima ERs +*/ + +/** @brief Create an Extremal Region Filter for the 1st stage classifier of N&M algorithm [Neumann12]. + +@param cb : Callback with the classifier. Default classifier can be implicitly load with function +loadClassifierNM1, e.g. from file in samples/cpp/trained_classifierNM1.xml +@param thresholdDelta : Threshold step in subsequent thresholds when extracting the component tree +@param minArea : The minimum area (% of image size) allowed for retreived ER's +@param minArea : The maximum area (% of image size) allowed for retreived ER's +@param minProbability : The minimum probability P(er|character) allowed for retreived ER's +@param nonMaxSuppression : Whenever non-maximum suppression is done over the branch probabilities +@param minProbability : The minimum probability difference between local maxima and local minima ERs + +The component tree of the image is extracted by a threshold increased step by step from 0 to 255, +incrementally computable descriptors (aspect_ratio, compactness, number of holes, and number of +horizontal crossings) are computed for each ER and used as features for a classifier which estimates +the class-conditional probability P(er|character). The value of P(er|character) is tracked using the +inclusion relation of ER across all thresholds and only the ERs which correspond to local maximum of +the probability P(er|character) are selected (if the local maximum of the probability is above a +global limit pmin and the difference between local maximum and local minimum is greater than +minProbabilityDiff). + */ +CV_EXPORTS_W Ptr<ERFilter> createERFilterNM1(const Ptr<ERFilter::Callback>& cb, + int thresholdDelta = 1, float minArea = (float)0.00025, + float maxArea = (float)0.13, float minProbability = (float)0.4, + bool nonMaxSuppression = true, + float minProbabilityDiff = (float)0.1); + +/** @brief Create an Extremal Region Filter for the 2nd stage classifier of N&M algorithm [Neumann12]. + +@param cb : Callback with the classifier. Default classifier can be implicitly load with function +loadClassifierNM2, e.g. from file in samples/cpp/trained_classifierNM2.xml +@param minProbability : The minimum probability P(er|character) allowed for retreived ER's + +In the second stage, the ERs that passed the first stage are classified into character and +non-character classes using more informative but also more computationally expensive features. The +classifier uses all the features calculated in the first stage and the following additional +features: hole area ratio, convex hull ratio, and number of outer inflexion points. + */ +CV_EXPORTS_W Ptr<ERFilter> createERFilterNM2(const Ptr<ERFilter::Callback>& cb, + float minProbability = (float)0.3); + + +/** @brief Allow to implicitly load the default classifier when creating an ERFilter object. + +@param filename The XML or YAML file with the classifier model (e.g. trained_classifierNM1.xml) + +returns a pointer to ERFilter::Callback. + */ +CV_EXPORTS_W Ptr<ERFilter::Callback> loadClassifierNM1(const String& filename); + +/** @brief Allow to implicitly load the default classifier when creating an ERFilter object. + +@param filename The XML or YAML file with the classifier model (e.g. trained_classifierNM2.xml) + +returns a pointer to ERFilter::Callback. + */ +CV_EXPORTS_W Ptr<ERFilter::Callback> loadClassifierNM2(const String& filename); + + +//! computeNMChannels operation modes +enum { ERFILTER_NM_RGBLGrad, + ERFILTER_NM_IHSGrad + }; + +/** @brief Compute the different channels to be processed independently in the N&M algorithm [Neumann12]. + +@param _src Source image. Must be RGB CV_8UC3. + +@param _channels Output vector\<Mat\> where computed channels are stored. + +@param _mode Mode of operation. Currently the only available options are: +**ERFILTER_NM_RGBLGrad** (used by default) and **ERFILTER_NM_IHSGrad**. + +In N&M algorithm, the combination of intensity (I), hue (H), saturation (S), and gradient magnitude +channels (Grad) are used in order to obtain high localization recall. This implementation also +provides an alternative combination of red (R), green (G), blue (B), lightness (L), and gradient +magnitude (Grad). + */ +CV_EXPORTS_W void computeNMChannels(InputArray _src, CV_OUT OutputArrayOfArrays _channels, int _mode = ERFILTER_NM_RGBLGrad); + + + +//! text::erGrouping operation modes +enum erGrouping_Modes { + + /** Exhaustive Search algorithm proposed in [Neumann11] for grouping horizontally aligned text. + The algorithm models a verification function for all the possible ER sequences. The + verification fuction for ER pairs consists in a set of threshold-based pairwise rules which + compare measurements of two regions (height ratio, centroid angle, and region distance). The + verification function for ER triplets creates a word text line estimate using Least + Median-Squares fitting for a given triplet and then verifies that the estimate is valid (based + on thresholds created during training). Verification functions for sequences larger than 3 are + approximated by verifying that the text line parameters of all (sub)sequences of length 3 are + consistent. + */ + ERGROUPING_ORIENTATION_HORIZ, + /** Text grouping method proposed in [Gomez13][Gomez14] for grouping arbitrary oriented text. Regions + are agglomerated by Single Linkage Clustering in a weighted feature space that combines proximity + (x,y coordinates) and similarity measures (color, size, gradient magnitude, stroke width, etc.). + SLC provides a dendrogram where each node represents a text group hypothesis. Then the algorithm + finds the branches corresponding to text groups by traversing this dendrogram with a stopping rule + that combines the output of a rotation invariant text group classifier and a probabilistic measure + for hierarchical clustering validity assessment. + */ + ERGROUPING_ORIENTATION_ANY +}; + +/** @brief Find groups of Extremal Regions that are organized as text blocks. + +@param img Original RGB or Greyscale image from wich the regions were extracted. + +@param channels Vector of single channel images CV_8UC1 from wich the regions were extracted. + +@param regions Vector of ER's retreived from the ERFilter algorithm from each channel. + +@param groups The output of the algorithm is stored in this parameter as set of lists of indexes to +provided regions. + +@param groups_rects The output of the algorithm are stored in this parameter as list of rectangles. + +@param method Grouping method (see text::erGrouping_Modes). Can be one of ERGROUPING_ORIENTATION_HORIZ, +ERGROUPING_ORIENTATION_ANY. + +@param filename The XML or YAML file with the classifier model (e.g. +samples/trained_classifier_erGrouping.xml). Only to use when grouping method is +ERGROUPING_ORIENTATION_ANY. + +@param minProbablity The minimum probability for accepting a group. Only to use when grouping +method is ERGROUPING_ORIENTATION_ANY. + */ +CV_EXPORTS void erGrouping(InputArray img, InputArrayOfArrays channels, + std::vector<std::vector<ERStat> > ®ions, + std::vector<std::vector<Vec2i> > &groups, + std::vector<Rect> &groups_rects, + int method = ERGROUPING_ORIENTATION_HORIZ, + const std::string& filename = std::string(), + float minProbablity = 0.5); + +CV_EXPORTS_W void erGrouping(InputArray image, InputArray channel, + std::vector<std::vector<Point> > regions, + CV_OUT std::vector<Rect> &groups_rects, + int method = ERGROUPING_ORIENTATION_HORIZ, + const String& filename = String(), + float minProbablity = (float)0.5); + +/** @brief Converts MSER contours (vector\<Point\>) to ERStat regions. + +@param image Source image CV_8UC1 from which the MSERs where extracted. + +@param contours Intput vector with all the contours (vector\<Point\>). + +@param regions Output where the ERStat regions are stored. + +It takes as input the contours provided by the OpenCV MSER feature detector and returns as output +two vectors of ERStats. This is because MSER() output contains both MSER+ and MSER- regions in a +single vector\<Point\>, the function separates them in two different vectors (this is as if the +ERStats where extracted from two different channels). + +An example of MSERsToERStats in use can be found in the text detection webcam_demo: +<https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/webcam_demo.cpp> + */ +CV_EXPORTS void MSERsToERStats(InputArray image, std::vector<std::vector<Point> > &contours, + std::vector<std::vector<ERStat> > ®ions); + +// Utility funtion for scripting +CV_EXPORTS_W void detectRegions(InputArray image, const Ptr<ERFilter>& er_filter1, const Ptr<ERFilter>& er_filter2, CV_OUT std::vector< std::vector<Point> >& regions); + +//! @} + +} +} +#endif // _OPENCV_TEXT_ERFILTER_HPP_ diff --git a/thirdparty/linux/include/opencv2/text/ocr.hpp b/thirdparty/linux/include/opencv2/text/ocr.hpp new file mode 100644 index 0000000..1261046 --- /dev/null +++ b/thirdparty/linux/include/opencv2/text/ocr.hpp @@ -0,0 +1,470 @@ +/*M////////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef __OPENCV_TEXT_OCR_HPP__ +#define __OPENCV_TEXT_OCR_HPP__ + +#include <vector> +#include <string> + +namespace cv +{ +namespace text +{ + +//! @addtogroup text_recognize +//! @{ + +enum +{ + OCR_LEVEL_WORD, + OCR_LEVEL_TEXTLINE +}; + +//base class BaseOCR declares a common API that would be used in a typical text recognition scenario +class CV_EXPORTS_W BaseOCR +{ +public: + virtual ~BaseOCR() {}; + virtual void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL, + std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL, + int component_level=0) = 0; + virtual void run(Mat& image, Mat& mask, std::string& output_text, std::vector<Rect>* component_rects=NULL, + std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL, + int component_level=0) = 0; +}; + +/** @brief OCRTesseract class provides an interface with the tesseract-ocr API (v3.02.02) in C++. + +Notice that it is compiled only when tesseract-ocr is correctly installed. + +@note + - (C++) An example of OCRTesseract recognition combined with scene text detection can be found + at the end_to_end_recognition demo: + <https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/end_to_end_recognition.cpp> + - (C++) Another example of OCRTesseract recognition combined with scene text detection can be + found at the webcam_demo: + <https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/webcam_demo.cpp> + */ +class CV_EXPORTS_W OCRTesseract : public BaseOCR +{ +public: + /** @brief Recognize text using the tesseract-ocr API. + + Takes image on input and returns recognized text in the output_text parameter. Optionally + provides also the Rects for individual text elements found (e.g. words), and the list of those + text elements with their confidence values. + + @param image Input image CV_8UC1 or CV_8UC3 + @param output_text Output text of the tesseract-ocr. + @param component_rects If provided the method will output a list of Rects for the individual + text elements found (e.g. words or text lines). + @param component_texts If provided the method will output a list of text strings for the + recognition of individual text elements found (e.g. words or text lines). + @param component_confidences If provided the method will output a list of confidence values + for the recognition of individual text elements found (e.g. words or text lines). + @param component_level OCR_LEVEL_WORD (by default), or OCR_LEVEL_TEXT_LINE. + */ + virtual void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL, + std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL, + int component_level=0); + + virtual void run(Mat& image, Mat& mask, std::string& output_text, std::vector<Rect>* component_rects=NULL, + std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL, + int component_level=0); + + // aliases for scripting + CV_WRAP String run(InputArray image, int min_confidence, int component_level=0); + + CV_WRAP String run(InputArray image, InputArray mask, int min_confidence, int component_level=0); + + CV_WRAP virtual void setWhiteList(const String& char_whitelist) = 0; + + + /** @brief Creates an instance of the OCRTesseract class. Initializes Tesseract. + + @param datapath the name of the parent directory of tessdata ended with "/", or NULL to use the + system's default directory. + @param language an ISO 639-3 code or NULL will default to "eng". + @param char_whitelist specifies the list of characters used for recognition. NULL defaults to + "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ". + @param oem tesseract-ocr offers different OCR Engine Modes (OEM), by deffault + tesseract::OEM_DEFAULT is used. See the tesseract-ocr API documentation for other possible + values. + @param psmode tesseract-ocr offers different Page Segmentation Modes (PSM) tesseract::PSM_AUTO + (fully automatic layout analysis) is used. See the tesseract-ocr API documentation for other + possible values. + */ + CV_WRAP static Ptr<OCRTesseract> create(const char* datapath=NULL, const char* language=NULL, + const char* char_whitelist=NULL, int oem=3, int psmode=3); +}; + + +/* OCR HMM Decoder */ + +enum decoder_mode +{ + OCR_DECODER_VITERBI = 0 // Other algorithms may be added +}; + +/** @brief OCRHMMDecoder class provides an interface for OCR using Hidden Markov Models. + +@note + - (C++) An example on using OCRHMMDecoder recognition combined with scene text detection can + be found at the webcam_demo sample: + <https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/webcam_demo.cpp> + */ +class CV_EXPORTS_W OCRHMMDecoder : public BaseOCR +{ +public: + + /** @brief Callback with the character classifier is made a class. + + This way it hides the feature extractor and the classifier itself, so developers can write + their own OCR code. + + The default character classifier and feature extractor can be loaded using the utility funtion + loadOCRHMMClassifierNM and KNN model provided in + <https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/OCRHMM_knn_model_data.xml.gz>. + */ + class CV_EXPORTS_W ClassifierCallback + { + public: + virtual ~ClassifierCallback() { } + /** @brief The character classifier must return a (ranked list of) class(es) id('s) + + @param image Input image CV_8UC1 or CV_8UC3 with a single letter. + @param out_class The classifier returns the character class categorical label, or list of + class labels, to which the input image corresponds. + @param out_confidence The classifier returns the probability of the input image + corresponding to each classes in out_class. + */ + virtual void eval( InputArray image, std::vector<int>& out_class, std::vector<double>& out_confidence); + }; + +public: + /** @brief Recognize text using HMM. + + Takes binary image on input and returns recognized text in the output_text parameter. Optionally + provides also the Rects for individual text elements found (e.g. words), and the list of those + text elements with their confidence values. + + @param image Input binary image CV_8UC1 with a single text line (or word). + + @param output_text Output text. Most likely character sequence found by the HMM decoder. + + @param component_rects If provided the method will output a list of Rects for the individual + text elements found (e.g. words). + + @param component_texts If provided the method will output a list of text strings for the + recognition of individual text elements found (e.g. words). + + @param component_confidences If provided the method will output a list of confidence values + for the recognition of individual text elements found (e.g. words). + + @param component_level Only OCR_LEVEL_WORD is supported. + */ + virtual void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL, + std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL, + int component_level=0); + + /** @brief Recognize text using HMM. + + Takes an image and a mask (where each connected component corresponds to a segmented character) + on input and returns recognized text in the output_text parameter. Optionally + provides also the Rects for individual text elements found (e.g. words), and the list of those + text elements with their confidence values. + + @param image Input image CV_8UC1 or CV_8UC3 with a single text line (or word). + @param mask Input binary image CV_8UC1 same size as input image. Each connected component in mask corresponds to a segmented character in the input image. + + @param output_text Output text. Most likely character sequence found by the HMM decoder. + + @param component_rects If provided the method will output a list of Rects for the individual + text elements found (e.g. words). + + @param component_texts If provided the method will output a list of text strings for the + recognition of individual text elements found (e.g. words). + + @param component_confidences If provided the method will output a list of confidence values + for the recognition of individual text elements found (e.g. words). + + @param component_level Only OCR_LEVEL_WORD is supported. + */ + virtual void run(Mat& image, Mat& mask, std::string& output_text, std::vector<Rect>* component_rects=NULL, + std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL, + int component_level=0); + + // aliases for scripting + CV_WRAP String run(InputArray image, int min_confidence, int component_level=0); + + CV_WRAP String run(InputArray image, InputArray mask, int min_confidence, int component_level=0); + + /** @brief Creates an instance of the OCRHMMDecoder class. Initializes HMMDecoder. + + @param classifier The character classifier with built in feature extractor. + + @param vocabulary The language vocabulary (chars when ascii english text). vocabulary.size() + must be equal to the number of classes of the classifier. + + @param transition_probabilities_table Table with transition probabilities between character + pairs. cols == rows == vocabulary.size(). + + @param emission_probabilities_table Table with observation emission probabilities. cols == + rows == vocabulary.size(). + + @param mode HMM Decoding algorithm. Only OCR_DECODER_VITERBI is available for the moment + (<http://en.wikipedia.org/wiki/Viterbi_algorithm>). + */ + static Ptr<OCRHMMDecoder> create(const Ptr<OCRHMMDecoder::ClassifierCallback> classifier,// The character classifier with built in feature extractor + const std::string& vocabulary, // The language vocabulary (chars when ascii english text) + // size() must be equal to the number of classes + InputArray transition_probabilities_table, // Table with transition probabilities between character pairs + // cols == rows == vocabulari.size() + InputArray emission_probabilities_table, // Table with observation emission probabilities + // cols == rows == vocabulari.size() + decoder_mode mode = OCR_DECODER_VITERBI); // HMM Decoding algorithm (only Viterbi for the moment) + + CV_WRAP static Ptr<OCRHMMDecoder> create(const Ptr<OCRHMMDecoder::ClassifierCallback> classifier,// The character classifier with built in feature extractor + const String& vocabulary, // The language vocabulary (chars when ascii english text) + // size() must be equal to the number of classes + InputArray transition_probabilities_table, // Table with transition probabilities between character pairs + // cols == rows == vocabulari.size() + InputArray emission_probabilities_table, // Table with observation emission probabilities + // cols == rows == vocabulari.size() + int mode = OCR_DECODER_VITERBI); // HMM Decoding algorithm (only Viterbi for the moment) + +protected: + + Ptr<OCRHMMDecoder::ClassifierCallback> classifier; + std::string vocabulary; + Mat transition_p; + Mat emission_p; + decoder_mode mode; +}; + +/** @brief Allow to implicitly load the default character classifier when creating an OCRHMMDecoder object. + +@param filename The XML or YAML file with the classifier model (e.g. OCRHMM_knn_model_data.xml) + +The KNN default classifier is based in the scene text recognition method proposed by Lukás Neumann & +Jiri Matas in [Neumann11b]. Basically, the region (contour) in the input image is normalized to a +fixed size, while retaining the centroid and aspect ratio, in order to extract a feature vector +based on gradient orientations along the chain-code of its perimeter. Then, the region is classified +using a KNN model trained with synthetic data of rendered characters with different standard font +types. + */ + +CV_EXPORTS_W Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierNM(const String& filename); + +/** @brief Allow to implicitly load the default character classifier when creating an OCRHMMDecoder object. + +@param filename The XML or YAML file with the classifier model (e.g. OCRBeamSearch_CNN_model_data.xml.gz) + +The CNN default classifier is based in the scene text recognition method proposed by Adam Coates & +Andrew NG in [Coates11a]. The character classifier consists in a Single Layer Convolutional Neural Network and +a linear classifier. It is applied to the input image in a sliding window fashion, providing a set of recognitions +at each window location. + */ +CV_EXPORTS_W Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierCNN(const String& filename); + +//! @} + +/** @brief Utility function to create a tailored language model transitions table from a given list of words (lexicon). + * + * @param vocabulary The language vocabulary (chars when ascii english text). + * + * @param lexicon The list of words that are expected to be found in a particular image. + * + * @param transition_probabilities_table Output table with transition probabilities between character pairs. cols == rows == vocabulary.size(). + * + * The function calculate frequency statistics of character pairs from the given lexicon and fills the output transition_probabilities_table with them. The transition_probabilities_table can be used as input in the OCRHMMDecoder::create() and OCRBeamSearchDecoder::create() methods. + * @note + * - (C++) An alternative would be to load the default generic language transition table provided in the text module samples folder (created from ispell 42869 english words list) : + * <https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/OCRHMM_transitions_table.xml> + **/ +CV_EXPORTS void createOCRHMMTransitionsTable(std::string& vocabulary, std::vector<std::string>& lexicon, OutputArray transition_probabilities_table); + +CV_EXPORTS_W Mat createOCRHMMTransitionsTable(const String& vocabulary, std::vector<cv::String>& lexicon); + + +/* OCR BeamSearch Decoder */ + +/** @brief OCRBeamSearchDecoder class provides an interface for OCR using Beam Search algorithm. + +@note + - (C++) An example on using OCRBeamSearchDecoder recognition combined with scene text detection can + be found at the demo sample: + <https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/word_recognition.cpp> + */ +class CV_EXPORTS_W OCRBeamSearchDecoder : public BaseOCR +{ +public: + + /** @brief Callback with the character classifier is made a class. + + This way it hides the feature extractor and the classifier itself, so developers can write + their own OCR code. + + The default character classifier and feature extractor can be loaded using the utility funtion + loadOCRBeamSearchClassifierCNN with all its parameters provided in + <https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/OCRBeamSearch_CNN_model_data.xml.gz>. + */ + class CV_EXPORTS_W ClassifierCallback + { + public: + virtual ~ClassifierCallback() { } + /** @brief The character classifier must return a (ranked list of) class(es) id('s) + + @param image Input image CV_8UC1 or CV_8UC3 with a single letter. + @param recognition_probabilities For each of the N characters found the classifier returns a list with + class probabilities for each class. + @param oversegmentation The classifier returns a list of N+1 character locations' x-coordinates, + including 0 as start-sequence location. + */ + virtual void eval( InputArray image, std::vector< std::vector<double> >& recognition_probabilities, std::vector<int>& oversegmentation ); + + int getWindowSize() {return 0;} + int getStepSize() {return 0;} + }; + +public: + /** @brief Recognize text using Beam Search. + + Takes image on input and returns recognized text in the output_text parameter. Optionally + provides also the Rects for individual text elements found (e.g. words), and the list of those + text elements with their confidence values. + + @param image Input binary image CV_8UC1 with a single text line (or word). + + @param output_text Output text. Most likely character sequence found by the HMM decoder. + + @param component_rects If provided the method will output a list of Rects for the individual + text elements found (e.g. words). + + @param component_texts If provided the method will output a list of text strings for the + recognition of individual text elements found (e.g. words). + + @param component_confidences If provided the method will output a list of confidence values + for the recognition of individual text elements found (e.g. words). + + @param component_level Only OCR_LEVEL_WORD is supported. + */ + virtual void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL, + std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL, + int component_level=0); + + virtual void run(Mat& image, Mat& mask, std::string& output_text, std::vector<Rect>* component_rects=NULL, + std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL, + int component_level=0); + + // aliases for scripting + CV_WRAP String run(InputArray image, int min_confidence, int component_level=0); + + CV_WRAP String run(InputArray image, InputArray mask, int min_confidence, int component_level=0); + + /** @brief Creates an instance of the OCRBeamSearchDecoder class. Initializes HMMDecoder. + + @param classifier The character classifier with built in feature extractor. + + @param vocabulary The language vocabulary (chars when ascii english text). vocabulary.size() + must be equal to the number of classes of the classifier. + + @param transition_probabilities_table Table with transition probabilities between character + pairs. cols == rows == vocabulary.size(). + + @param emission_probabilities_table Table with observation emission probabilities. cols == + rows == vocabulary.size(). + + @param mode HMM Decoding algorithm. Only OCR_DECODER_VITERBI is available for the moment + (<http://en.wikipedia.org/wiki/Viterbi_algorithm>). + + @param beam_size Size of the beam in Beam Search algorithm. + */ + static Ptr<OCRBeamSearchDecoder> create(const Ptr<OCRBeamSearchDecoder::ClassifierCallback> classifier,// The character classifier with built in feature extractor + const std::string& vocabulary, // The language vocabulary (chars when ascii english text) + // size() must be equal to the number of classes + InputArray transition_probabilities_table, // Table with transition probabilities between character pairs + // cols == rows == vocabulari.size() + InputArray emission_probabilities_table, // Table with observation emission probabilities + // cols == rows == vocabulari.size() + decoder_mode mode = OCR_DECODER_VITERBI, // HMM Decoding algorithm (only Viterbi for the moment) + int beam_size = 500); // Size of the beam in Beam Search algorithm + + CV_WRAP static Ptr<OCRBeamSearchDecoder> create(const Ptr<OCRBeamSearchDecoder::ClassifierCallback> classifier, // The character classifier with built in feature extractor + const String& vocabulary, // The language vocabulary (chars when ascii english text) + // size() must be equal to the number of classes + InputArray transition_probabilities_table, // Table with transition probabilities between character pairs + // cols == rows == vocabulari.size() + InputArray emission_probabilities_table, // Table with observation emission probabilities + // cols == rows == vocabulari.size() + int mode = OCR_DECODER_VITERBI, // HMM Decoding algorithm (only Viterbi for the moment) + int beam_size = 500); // Size of the beam in Beam Search algorithm + +protected: + + Ptr<OCRBeamSearchDecoder::ClassifierCallback> classifier; + std::string vocabulary; + Mat transition_p; + Mat emission_p; + decoder_mode mode; + int beam_size; +}; + +/** @brief Allow to implicitly load the default character classifier when creating an OCRBeamSearchDecoder object. + +@param filename The XML or YAML file with the classifier model (e.g. OCRBeamSearch_CNN_model_data.xml.gz) + +The CNN default classifier is based in the scene text recognition method proposed by Adam Coates & +Andrew NG in [Coates11a]. The character classifier consists in a Single Layer Convolutional Neural Network and +a linear classifier. It is applied to the input image in a sliding window fashion, providing a set of recognitions +at each window location. + */ + +CV_EXPORTS_W Ptr<OCRBeamSearchDecoder::ClassifierCallback> loadOCRBeamSearchClassifierCNN(const String& filename); + +//! @} + +} +} +#endif // _OPENCV_TEXT_OCR_HPP_ |