00001
00002
00003
00004
00005
00006
00012
00013
00014
00015
00016
00017
00018
00019
00020 #include "../config.h"
00021 #include <iostream>
00022 #include <fstream>
00023 #include <stdlib.h>
00024 #include <stdio.h>
00025 #include <string>
00026 #include "ParserEventGeneratorKit.h"
00027 #include "libofx.h"
00028 #include "messages.hh"
00029 #include "ofx_sgml.hh"
00030 #include "ofc_sgml.hh"
00031 #include "ofx_preproc.hh"
00032 #ifdef HAVE_ICONV
00033 #include <iconv.h>
00034 #endif
00035
00036 #define LIBOFX_DEFAULT_INPUT_ENCODING "CP1252"
00037 #define LIBOFX_DEFAULT_OUTPUT_ENCODING "UTF-8"
00038
00039 using namespace std;
00043 #ifdef MAKEFILE_DTD_PATH
00044 const int DTD_SEARCH_PATH_NUM = 4;
00045 #else
00046 const int DTD_SEARCH_PATH_NUM = 3;
00047 #endif
00048
00052 const char *DTD_SEARCH_PATH[DTD_SEARCH_PATH_NUM] = {
00053 #ifdef MAKEFILE_DTD_PATH
00054 MAKEFILE_DTD_PATH ,
00055 #endif
00056 "/usr/local/share/libofx/dtd/",
00057 "/usr/share/libofx/dtd/",
00058 "~/"};
00059 const unsigned int READ_BUFFER_SIZE = 1024;
00060
00065 CFCT int ofx_proc_file(LibofxContextPtr ctx, const char * p_filename)
00066 {
00067 LibofxContext *libofx_context;
00068 bool ofx_start=false;
00069 bool ofx_end=false;
00070
00071 ifstream input_file;
00072 ofstream tmp_file;
00073 char buffer[READ_BUFFER_SIZE];
00074 char iconv_buffer[READ_BUFFER_SIZE];
00075 string s_buffer;
00076 char *filenames[3];
00077 char tmp_filename[50];
00078 #ifdef HAVE_ICONV
00079 iconv_t conversion_descriptor;
00080 #endif
00081 libofx_context=(LibofxContext*)ctx;
00082
00083 if(p_filename!=NULL&&strcmp(p_filename,"")!=0)
00084 {
00085 message_out(DEBUG, string("ofx_proc_file():Opening file: ")+ p_filename);
00086
00087 input_file.open(p_filename);
00088 strncpy(tmp_filename,"/tmp/libofxtmpXXXXXX",50);
00089 mkstemp(tmp_filename);
00090 tmp_file.open(tmp_filename);
00091
00092 message_out(DEBUG,"ofx_proc_file(): Creating temp file: "+string(tmp_filename));
00093 if(!input_file){
00094 message_out(ERROR,"ofx_proc_file():Unable to open the input file "+string(p_filename));
00095 }
00096 else if(!tmp_file){
00097 message_out(ERROR,"ofx_proc_file():Unable to open the output file "+string(tmp_filename));
00098 }
00099 else
00100 {
00101 int header_separator_idx;
00102 string header_name;
00103 string header_value;
00104 string ofx_encoding;
00105 string ofx_charset;
00106 do {
00107 input_file.getline(buffer, sizeof(buffer),'\n');
00108
00109 s_buffer.assign(buffer);
00110
00111 if(input_file.gcount()<(sizeof(buffer)-1))
00112 {
00113 s_buffer.append("\n");
00114 }
00115 else if( !input_file.eof()&&input_file.fail())
00116 {
00117 input_file.clear();
00118 }
00119 int ofx_start_idx;
00120 if (ofx_start==false &&
00121 (
00122 (libofx_context->currentFileType()==OFX&&
00123 ((ofx_start_idx=s_buffer.find("<OFX>"))!=
00124 string::npos||(ofx_start_idx=s_buffer.find("<ofx>"))!=string::npos))
00125 || (libofx_context->currentFileType()==OFC&&
00126 ((ofx_start_idx=s_buffer.find("<OFC>"))!=string::npos||
00127 (ofx_start_idx=s_buffer.find("<ofc>"))!=string::npos))
00128 )
00129 )
00130 {
00131 ofx_start=true;
00132 s_buffer.erase(0,ofx_start_idx);
00133 message_out(DEBUG,"ofx_proc_file():<OFX> or <OFC> has been found");
00134 #ifdef HAVE_ICONV
00135 string fromcode;
00136 string tocode;
00137 if(ofx_encoding.compare("USASCII")==0){
00138 if(ofx_charset.compare("ISO-8859-1")==0){
00139 fromcode="ISO-8859-1";
00140 }
00141 else if(ofx_charset.compare("1252")==0){
00142 fromcode="CP1252";
00143 }
00144 else if(ofx_charset.compare("NONE")==0){
00145 fromcode=LIBOFX_DEFAULT_INPUT_ENCODING;
00146 }
00147 }
00148 else if(ofx_encoding.compare("USASCII")==0) {
00149 fromcode="UTF-8";
00150 }
00151 else
00152 {
00153 fromcode=LIBOFX_DEFAULT_INPUT_ENCODING;
00154 }
00155 tocode = LIBOFX_DEFAULT_OUTPUT_ENCODING;
00156 message_out(DEBUG,"ofx_proc_file(): Setting up iconv for fromcode: "+fromcode+", tocode: "+tocode);
00157 conversion_descriptor = iconv_open (tocode.c_str(), fromcode.c_str());
00158 #endif
00159 }
00160 else {
00161
00162 if ((header_separator_idx=s_buffer.find(':')) != string::npos) {
00163
00164 header_name.assign(s_buffer.substr(0,header_separator_idx));
00165 header_value.assign(s_buffer.substr(header_separator_idx+1));
00166 message_out(DEBUG,"ofx_proc_file():Header: "+header_name+" with value: "+header_value+" has been found");
00167 if(header_name.compare("ENCODING")==0) {
00168 ofx_encoding.assign(header_value);
00169 }
00170 if(header_name.compare("CHARSET")==0) {
00171 ofx_charset.assign(header_value);
00172 }
00173 }
00174 }
00175
00176 if(ofx_start==true && ofx_end==false){
00177 s_buffer=sanitize_proprietary_tags(s_buffer);
00178
00179 #ifdef HAVE_ICONV
00180 memset(iconv_buffer,0,READ_BUFFER_SIZE);
00181 size_t inbytesleft = strlen(s_buffer.c_str());
00182 size_t outbytesleft = READ_BUFFER_SIZE;
00183 char * inchar = (char *)s_buffer.c_str();
00184 char * outchar = iconv_buffer;
00185 int iconv_retval = iconv (conversion_descriptor,
00186 &inchar, &inbytesleft,
00187 &outchar, &outbytesleft);
00188 if(iconv_retval==-1){
00189 message_out(ERROR,"ofx_proc_file(): Conversion error");
00190 }
00191 s_buffer = iconv_buffer;
00192 #endif
00193 tmp_file.write(s_buffer.c_str(), s_buffer.length());
00194 }
00195
00196 if (ofx_start==true &&
00197 (
00198 (libofx_context->currentFileType()==OFX &&
00199 ((ofx_start_idx=s_buffer.find("</OFX>"))!=string::npos ||
00200 (ofx_start_idx=s_buffer.find("</ofx>"))!=string::npos))
00201 || (libofx_context->currentFileType()==OFC &&
00202 ((ofx_start_idx=s_buffer.find("</OFC>"))!=string::npos ||
00203 (ofx_start_idx=s_buffer.find("</ofc>"))!=string::npos))
00204 )
00205 )
00206 {
00207 ofx_end=true;
00208 message_out(DEBUG,"ofx_proc_file():</OFX> or </OFC> has been found");
00209 }
00210
00211 } while(!input_file.eof()&&!input_file.bad());
00212 }
00213 input_file.close();
00214 tmp_file.close();
00215 #ifdef HAVE_ICONV
00216 iconv_close(conversion_descriptor);
00217 #endif
00218 char filename_openspdtd[255];
00219 char filename_dtd[255];
00220 char filename_ofx[255];
00221 strncpy(filename_openspdtd,find_dtd(OPENSPDCL_FILENAME).c_str(),255);
00222 if(libofx_context->currentFileType()==OFX)
00223 {
00224 strncpy(filename_dtd,find_dtd(OFX160DTD_FILENAME).c_str(),255);
00225 }
00226 else if(libofx_context->currentFileType()==OFC)
00227 {
00228 strncpy(filename_dtd,find_dtd(OFCDTD_FILENAME).c_str(),255);
00229 }
00230 else
00231 {
00232 message_out(ERROR,string("ofx_proc_file(): Error unknown file format for the OFX parser"));
00233 }
00234
00235 if((string)filename_dtd!="" && (string)filename_openspdtd!="")
00236 {
00237 strncpy(filename_ofx,tmp_filename,255);
00238 filenames[0]=filename_openspdtd;
00239 filenames[1]=filename_dtd;
00240 filenames[2]=filename_ofx;
00241 if(libofx_context->currentFileType()==OFX)
00242 {
00243 ofx_proc_sgml(libofx_context, 3,filenames);
00244 }
00245 else if(libofx_context->currentFileType()==OFC)
00246 {
00247 ofc_proc_sgml(libofx_context, 3,filenames);
00248 }
00249 else
00250 {
00251 message_out(ERROR,string("ofx_proc_file(): Error unknown file format for the OFX parser"));
00252 }
00253 if(remove(tmp_filename)!=0)
00254 {
00255 message_out(ERROR,"ofx_proc_file(): Error deleting temporary file "+string(tmp_filename));
00256 }
00257 }
00258 else
00259 {
00260 message_out(ERROR,"ofx_proc_file(): FATAL: Missing DTD, aborting");
00261 }
00262 }
00263 else{
00264 message_out(ERROR,"ofx_proc_file():No input file specified");
00265 }
00266 return 0;
00267 }
00268
00269
00270
00271 CFCT int libofx_proc_buffer(LibofxContextPtr ctx,
00272 const char *s, unsigned int size){
00273 ofstream tmp_file;
00274 string s_buffer;
00275 char *filenames[3];
00276 char tmp_filename[50];
00277 int pos;
00278 LibofxContext *libofx_context;
00279
00280 libofx_context=(LibofxContext*)ctx;
00281
00282 if (size==0) {
00283 message_out(ERROR,
00284 "ofx_proc_file(): bad size");
00285 return -1;
00286 }
00287 s_buffer=string(s, size);
00288
00289 strncpy(tmp_filename,"/tmp/libofxtmpXXXXXX",50);
00290 mkstemp(tmp_filename);
00291 tmp_file.open(tmp_filename);
00292
00293 message_out(DEBUG,"ofx_proc_file(): Creating temp file: "+string(tmp_filename));
00294 if(!tmp_file){
00295 message_out(ERROR,"ofx_proc_file():Unable to open the output file "+string(tmp_filename));
00296 return -1;
00297 }
00298
00299 if (libofx_context->currentFileType()==OFX) {
00300 pos=s_buffer.find("<OFX>");
00301 if (pos==string::npos)
00302 pos=s_buffer.find("<ofx>");
00303 }
00304 else if (libofx_context->currentFileType()==OFC) {
00305 pos=s_buffer.find("<OFC>");
00306 if (pos==string::npos)
00307 pos=s_buffer.find("<ofc>");
00308 }
00309 else {
00310 message_out(ERROR,"ofx_proc(): unknown file type");
00311 return -1;
00312 }
00313 if (pos==string::npos || pos > s_buffer.size()) {
00314 message_out(ERROR,"ofx_proc():<OFX> has not been found");
00315 return -1;
00316 }
00317 else {
00318
00319 s_buffer.erase(0, pos);
00320 message_out(DEBUG,"ofx_proc_file():<OF?> has been found");
00321 }
00322
00323 if (libofx_context->currentFileType()==OFX) {
00324 pos=s_buffer.find("</OFX>");
00325 if (pos==string::npos)
00326 pos=s_buffer.find("</ofx>");
00327 }
00328 else if (libofx_context->currentFileType()==OFC) {
00329 pos=s_buffer.find("</OFC>");
00330 if (pos==string::npos)
00331 pos=s_buffer.find("</ofc>");
00332 }
00333 else {
00334 message_out(ERROR,"ofx_proc(): unknown file type");
00335 return -1;
00336 }
00337
00338 if (pos==string::npos || pos > s_buffer.size()) {
00339 message_out(ERROR,"ofx_proc():</OF?> has not been found");
00340 return -1;
00341 }
00342 else {
00343
00344 if (s_buffer.size() > pos+6)
00345 s_buffer.erase(pos+6);
00346 message_out(DEBUG,"ofx_proc_file():<OFX> has been found");
00347 }
00348
00349 s_buffer=sanitize_proprietary_tags(s_buffer);
00350 tmp_file.write(s_buffer.c_str(), s_buffer.length());
00351
00352 tmp_file.close();
00353
00354 char filename_openspdtd[255];
00355 char filename_dtd[255];
00356 char filename_ofx[255];
00357 strncpy(filename_openspdtd,find_dtd(OPENSPDCL_FILENAME).c_str(),255);
00358 if(libofx_context->currentFileType()==OFX){
00359 strncpy(filename_dtd,find_dtd(OFX160DTD_FILENAME).c_str(),255);
00360 }
00361 else if(libofx_context->currentFileType()==OFC){
00362 strncpy(filename_dtd,find_dtd(OFCDTD_FILENAME).c_str(),255);
00363 }
00364 else {
00365 message_out(ERROR,string("ofx_proc_file(): Error unknown file format for the OFX parser"));
00366 }
00367
00368 if((string)filename_dtd!="" && (string)filename_openspdtd!=""){
00369 strncpy(filename_ofx,tmp_filename,255);
00370 filenames[0]=filename_openspdtd;
00371 filenames[1]=filename_dtd;
00372 filenames[2]=filename_ofx;
00373 if(libofx_context->currentFileType()==OFX){
00374 ofx_proc_sgml(libofx_context, 3,filenames);
00375 }
00376 else if(libofx_context->currentFileType()==OFC){
00377 ofc_proc_sgml(libofx_context, 3,filenames);
00378 }
00379 else {
00380 message_out(ERROR,string("ofx_proc_file(): Error unknown file format for the OFX parser"));
00381 }
00382 if(remove(tmp_filename)!=0){
00383 message_out(ERROR,"ofx_proc_file(): Error deleting temporary file "+string(tmp_filename));
00384 }
00385 }
00386 else {
00387 message_out(ERROR,"ofx_proc_file(): FATAL: Missing DTD, aborting");
00388 }
00389
00390 return 0;
00391 }
00392
00393
00394
00395
00396
00397
00402 string sanitize_proprietary_tags(string input_string)
00403 {
00404 unsigned int i;
00405 size_t input_string_size;
00406 bool strip=false;
00407 bool tag_open=false;
00408 int tag_open_idx=0;
00409 bool closing_tag_open=false;
00410 int orig_tag_open_idx=0;
00411 bool proprietary_tag=false;
00412 bool proprietary_closing_tag=false;
00413 int crop_end_idx=0;
00414 char buffer[READ_BUFFER_SIZE]="";
00415 char tagname[READ_BUFFER_SIZE]="";
00416 int tagname_idx=0;
00417 char close_tagname[READ_BUFFER_SIZE]="";
00418
00419 for(i=0;i<READ_BUFFER_SIZE;i++){
00420 buffer[i]=0;
00421 tagname[i]=0;
00422 close_tagname[i]=0;
00423 }
00424
00425 input_string_size=input_string.size();
00426
00427 for(i=0;i<=input_string_size;i++){
00428 if(input_string.c_str()[i]=='<'){
00429 tag_open=true;
00430 tag_open_idx=i;
00431 if(proprietary_tag==true&&input_string.c_str()[i+1]=='/'){
00432
00433 closing_tag_open=true;
00434
00435 if(strncmp(tagname,&(input_string.c_str()[i+2]),strlen(tagname))!=0){
00436
00437
00438 crop_end_idx=i-1;
00439 strip=true;
00440 }
00441 else{
00442
00443 proprietary_closing_tag=true;
00444 }
00445 }
00446 else if(proprietary_tag==true){
00447
00448 crop_end_idx=i-1;
00449 strip=true;
00450 }
00451 }
00452 else if(input_string.c_str()[i]=='>'){
00453 tag_open=false;
00454 closing_tag_open=false;
00455 tagname[tagname_idx]=0;
00456 tagname_idx=0;
00457 if(proprietary_closing_tag==true){
00458 crop_end_idx=i;
00459 strip=true;
00460 }
00461 }
00462 else if(tag_open==true&&closing_tag_open==false){
00463 if(input_string.c_str()[i]=='.'){
00464 if(proprietary_tag!=true){
00465 orig_tag_open_idx = tag_open_idx;
00466 proprietary_tag=true;
00467 }
00468 }
00469 tagname[tagname_idx]=input_string.c_str()[i];
00470 tagname_idx++;
00471 }
00472
00473 if(strip==true)
00474 {
00475 input_string.copy(buffer,(crop_end_idx-orig_tag_open_idx)+1,orig_tag_open_idx);
00476 message_out(INFO,"sanitize_proprietary_tags() (end tag or new tag) removed: "+string(buffer));
00477 input_string.erase(orig_tag_open_idx,(crop_end_idx-orig_tag_open_idx)+1);
00478 i=orig_tag_open_idx-1;
00479 proprietary_tag=false;
00480 proprietary_closing_tag=false;
00481 closing_tag_open=false;
00482 tag_open=false;
00483 strip=false;
00484 }
00485
00486 }
00487 if(proprietary_tag==true){
00488 if(crop_end_idx==0){
00489 crop_end_idx=input_string.size()-1;
00490 }
00491 input_string.copy(buffer,(crop_end_idx-orig_tag_open_idx)+1,orig_tag_open_idx);
00492 message_out(INFO,"sanitize_proprietary_tags() (end of line) removed: "+string(buffer));
00493 input_string.erase(orig_tag_open_idx,(crop_end_idx-orig_tag_open_idx)+1);
00494 }
00495 return input_string;
00496 }
00497
00498
00499
00505 string find_dtd(string dtd_filename)
00506 {
00507 int i;
00508 ifstream dtd_file;
00509 string dtd_path_filename;
00510 bool dtd_found=false;
00511
00512 for(i=0;i<DTD_SEARCH_PATH_NUM&&dtd_found==false;i++){
00513 dtd_path_filename=DTD_SEARCH_PATH[i];
00514 dtd_path_filename.append(dtd_filename);
00515 dtd_file.clear();
00516 dtd_file.open(dtd_path_filename.c_str());
00517 if(!dtd_file){
00518 message_out(DEBUG,"find_dtd():Unable to open the file "+dtd_path_filename);
00519 }
00520 else{
00521 message_out(STATUS,"find_dtd():DTD found: "+dtd_path_filename);
00522 dtd_file.close();
00523 dtd_found=true;
00524 }
00525 }
00526 if(dtd_found==false){
00527 message_out(ERROR,"find_dtd():Unable to find the DTD named " + dtd_filename);
00528 dtd_path_filename="";
00529 }
00530 return dtd_path_filename;
00531 }
00532
00533