=== modified file 'cuneiform_src/Kern/cuneiform-cli.cpp' --- cuneiform_src/Kern/cuneiform-cli.cpp 2008-08-29 14:45:50 +0000 +++ cuneiform_src/Kern/cuneiform-cli.cpp 2008-09-06 04:17:02 +0000 @@ -26,7 +26,7 @@ */ /* This is a simple command line program for the Puma library. */ - +#include #include"ctiimage.h" // Must be first, or else you get compile errors. #include @@ -40,7 +40,13 @@ #include"config.h" using namespace std; - +template //use this to prevent use full boost library +std::string toString(T val) +{ + std::ostringstream oss; + oss<< val; + return oss.str(); +} struct langlist { int puma_number; const char *name; @@ -97,6 +103,7 @@ {-1, NULL} }; +#define MIN_RESOLUTION 300 static string supported_languages() { ostringstream os; @@ -119,34 +126,77 @@ * Read file and return it as a BMP DIB entity. On failure write an error * and return NULL. Caller delete[]'s the returned result. */ -static char* read_file(const char *fname); + +static vector read_file(const char *fname); #ifdef USE_MAGICK #include using namespace Magick; -static char* read_file(const char *fname) { - Blob blob; - size_t data_size; - char *dib; +static vector read_file(const char *fname) { + vector vector_blob; + Blob temp_blob; + vector data_size; + size_t temp_data_size; + vectordib; + char * temp_dib; + int number_of_page; + Image image; + Image notext( "100x100", "white" ); + notext.draw(DrawableText(0,0,"Image without text.")); try { - Image image(fname); - // Write to BLOB in BMP format - image.write(&blob, "DIB"); + vector imageVector; + cout << "Create imageVector\n"; + readImages( &imageVector, fname); + cout << "read image to imageVector\n"; + number_of_page=imageVector.size(); + cout << "Number of page = " << number_of_page << "\n"; + for (int i=0;i<=number_of_page-1;i++) + { + cout << i << "\n"; + image=imageVector[i]; + if (image.xResolution()>=MIN_RESOLUTION) + { + // Write to vector of BLOB in BMP format + cout << "Page number " << i+1 << " have normal image resolution. Add it.\n"; + image.write(&temp_blob, "DIB"); + vector_blob.push_back(temp_blob); + } + else{ + notext.write(&temp_blob, "DIB"); + vector_blob.push_back(temp_blob); + cout << "Page number " << i+1 << " have small image resolution. Skipping it.\n"; + } + + } } catch(Exception &error_) { cerr << error_.what() << "\n"; - return NULL; + temp_dib=NULL; + dib.push_back(temp_dib); + return dib; } - data_size = blob.length(); - dib = new char[data_size]; - memcpy(dib, blob.data(), data_size); + //image.write( "x.bmp" ); + cout << "We write vector_blob \n"; + for (int i=0;i<=number_of_page-1;i++) + { + cout << "Page number "<< i+1 << ".\n"; + temp_data_size = vector_blob[i].length(); + data_size.push_back(temp_data_size); + cout << "We write date in the vector of data_size \n"; + temp_dib = new char [data_size[i]]; + dib.push_back(temp_dib); + cout << "We create vector of dib \n"; + memcpy(dib[i], vector_blob[i].data(), data_size[i]); + cout << "We make memcpy \n"; + } + //delete temp_dib; return dib; } - -#else // No ImageMagick++ - -static char* read_file(const char *fname) { - char bmpheader[2]; +#else // No ImageMagick++, no multipage format support + +static vector read_file(const char *fname) { + vector vector_of_pages; + char bmpheader[2]; char *dib; FILE *f; int32_t dibsize, offset; @@ -182,19 +232,21 @@ cerr << "Please convert your BMP to uncompressed V3 format and try again."; return NULL; } - return dib; + vector_of_pages.push_back(dib); + return vector_of_pages; } #endif // USE_MAGICK int main(int argc, char **argv) { - char *dib; + vector dib; const char *infilename = NULL; - Word32 langcode = PUMA_LANG_ENGLISH; // By default recognize plain english text. + int langcode = PUMA_LANG_ENGLISH; // By default recognize plain english text. Bool32 dotmatrix = FALSE; Bool32 fax = FALSE; - const char *defaultnamestem = "cuneiform-out."; - string outfilename; + const char *defaultnamestem = "cuneiform-out"; + string outfilename,outfilename_ext,t_outfilename; + std::string s; Int32 outputformat = PUMA_TOTEXT; cout << "Cuneiform for Linux " << CF_VERSION << "\n"; @@ -256,42 +308,48 @@ if (outfilename.empty()) { outfilename = defaultnamestem; + }else + { + int pos=outfilename.find("."); + if (pos!=string::npos) + { + outfilename.erase(pos);//filename with extension. Ignore it. + } + } switch (outputformat) { case PUMA_TOHTML: - outfilename += "html"; + outfilename_ext = ".html"; break; case PUMA_TORTF: - outfilename += "rtf"; + outfilename_ext = ".rtf"; break; case PUMA_TOTEXT: case PUMA_TOSMARTTEXT: case PUMA_TOTABLETXT: - outfilename += "txt"; + outfilename_ext = ".txt"; break; case PUMA_TOEDNATIVE: - outfilename += "cf"; + outfilename_ext = ".cf"; break; case PUMA_TOTABLEDBF: - outfilename += "dbf"; + outfilename_ext = ".dbf"; break; default: - outfilename += "buginprogram"; + outfilename_ext = ".buginprogram"; break; - } - } + } if(infilename == NULL) { cout << "Usage: " << argv[0] << "[-l languagename -f format --dotmatrix --fax -o result_file] imagefile\n"; return 0; } - dib = read_file(infilename); - if(!dib) // Error msg is already printed so just get out. + if(!dib[0]) // Error msg is already printed so just get out. return 1; if(!PUMA_Init(0, 0)) { @@ -303,8 +361,12 @@ PUMA_SetImportData(PUMA_Word32_Language, &langcode); PUMA_SetImportData(PUMA_Bool32_DotMatrix, &dotmatrix); PUMA_SetImportData(PUMA_Bool32_Fax100, &fax); - - if(!PUMA_XOpen(dib, "none.txt")) { + int number_of_pages; + number_of_pages=dib.size(); + cout <<"Number of PAGE=" << number_of_pages << "\n"; + for (int i=0;i<=number_of_pages-1;i++) + { + if(!PUMA_XOpen(dib[i], "none.txt")) { cerr << "PUMA_Xopen failed.\n"; return 1; } @@ -347,22 +409,25 @@ cerr << "PUMA_XFinalrecognition failed.\n"; return 1; } - - if(!PUMA_XSave(outfilename.c_str(), outputformat, 0)) { + t_outfilename=outfilename; + t_outfilename+="_page_"; + s=toString(i+1); + t_outfilename+=s; + t_outfilename+=outfilename_ext; + if(!PUMA_XSave(t_outfilename.c_str(), outputformat, PUMA_CODE_UTF8)) { cerr << "PUMA_XSave failed.\n"; return 1; } - + t_outfilename=""; if(!PUMA_XClose()) { cerr << "PUMA_XClose failed.\n"; return 1; } - + } if(!PUMA_Done()) { cerr << "PUMA_Done failed.\n"; return 1; } - - delete []dib; +// delete []dib; return 0; }