/*************************************************** Author : Rohit Suri TODO : Extract confidences for each character : Calculate Position of words ***************************************************/ #include #include #include "opencv2/core/core.hpp" #include "opencv2/highgui/highgui.hpp" #include "opencv2/opencv.hpp" #include "opencv2/imgproc/imgproc.hpp" #include #include using namespace cv; using namespace std; using namespace tesseract; extern "C" { #include "api_scilab.h" #include "Scierror.h" #include "BOOL.h" #include #include "sciprint.h" #include "../common.h" /*Calling syntax: ocr(I) */ int opencv_ocr(char *fname, unsigned long fname_len) { // Error management variables SciErr sciErr; //------Local variables------// Mat sourceImage; TessBaseAPI tesseract; int *wordConfidences = NULL; int wordCount = 0, characterCount = 0, count=0, coordinate; string word=""; char *text = NULL; char **words = NULL; char *boxInformation = NULL; int *characterBoundingBoxes = NULL; int characterBoundingBoxesPos = 0; //------Check number of parameters------// CheckInputArgument(pvApiCtx, 1, 1); CheckOutputArgument(pvApiCtx, 1, 4); //------Get input arguments------// retrieveImage(sourceImage, 1); //------Actual processing------// tesseract.Init(NULL, "eng", OEM_TESSERACT_ONLY); tesseract.SetPageSegMode(PSM_SINGLE_BLOCK); tesseract.SetVariable("tessedit_char_whitelist","0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!@#$%^&*()-_=+[]{}:'\";\|,.<>/"); tesseract.SetImage(sourceImage.data, sourceImage.cols, sourceImage.rows, sourceImage.elemSize(), sourceImage.step); wordConfidences = tesseract.AllWordConfidences(); boxInformation = tesseract.GetBoxText(0); for(int iter = 0; ;iter++) { if(wordConfidences[iter]!=-1) { wordCount++; } else { break; } } words = (char**) malloc(sizeof(char*)*wordCount); text = tesseract.GetUTF8Text(); for(int iter = 0; count < wordCount; iter++) { if(count == wordCount) { break; } if(text[iter]==' ' || text[iter]=='\n') { words[count] = (char*) malloc(sizeof(char)*word.length()+1); for(int char_iter=0; char_iter