xmlparser.cpp
Go to the documentation of this file.
00001 /*************************************************************************** 00002 file : $URL: https://frepple.svn.sourceforge.net/svnroot/frepple/trunk/src/utils/xmlparser.cpp $ 00003 version : $LastChangedRevision: 1475 $ $LastChangedBy: jdetaeye $ 00004 date : $LastChangedDate: 2011-07-09 13:41:52 +0200 (Sat, 09 Jul 2011) $ 00005 ***************************************************************************/ 00006 00007 /*************************************************************************** 00008 * * 00009 * Copyright (C) 2007-2011 by Johan De Taeye, frePPLe bvba * 00010 * * 00011 * This library is free software; you can redistribute it and/or modify it * 00012 * under the terms of the GNU Lesser General Public License as published * 00013 * by the Free Software Foundation; either version 2.1 of the License, or * 00014 * (at your option) any later version. * 00015 * * 00016 * This library is distributed in the hope that it will be useful, * 00017 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 00018 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser * 00019 * General Public License for more details. * 00020 * * 00021 * You should have received a copy of the GNU Lesser General Public * 00022 * License along with this library; if not, write to the Free Software * 00023 * Foundation Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 * 00024 * USA * 00025 * * 00026 ***************************************************************************/ 00027 00028 #define FREPPLE_CORE 00029 #include "frepple/utils.h" 00030 #include <sys/stat.h> 00031 00032 /* Uncomment the next line to create a lot of debugging messages during 00033 * the parsing of XML-data. */ 00034 //#define PARSE_DEBUG 00035 00036 // With VC++ we use the Win32 functions to browse a directory 00037 #ifdef _MSC_VER 00038 #define WIN32_LEAN_AND_MEAN 00039 #include <windows.h> 00040 #else 00041 // With Unix-like systems we use a check suggested by the autoconf tools 00042 #if HAVE_DIRENT_H 00043 # include <dirent.h> 00044 # define NAMLEN(dirent) strlen((dirent)->d_name) 00045 #else 00046 # define dirent direct 00047 # define NAMLEN(dirent) (dirent)->d_namlen 00048 # if HAVE_SYS_NDIR_H 00049 # include <sys/ndir.h> 00050 # endif 00051 # if HAVE_SYS_DIR_H 00052 # include <sys/dir.h> 00053 # endif 00054 # if HAVE_NDIR_H 00055 # include <ndir.h> 00056 # endif 00057 #endif 00058 #endif 00059 00060 00061 namespace frepple 00062 { 00063 namespace utils 00064 { 00065 00066 DECLARE_EXPORT const XMLOutput::content_type XMLOutput::STANDARD = 1; 00067 DECLARE_EXPORT const XMLOutput::content_type XMLOutput::PLAN = 2; 00068 DECLARE_EXPORT const XMLOutput::content_type XMLOutput::PLANDETAIL = 4; 00069 00070 00071 void XMLInput::processingInstruction 00072 (const XMLCh *const target, const XMLCh *const data) 00073 { 00074 char* type = xercesc::XMLString::transcode(target); 00075 char* value = xercesc::XMLString::transcode(data); 00076 try 00077 { 00078 if (!strcmp(type,"python")) 00079 { 00080 // "python" is the only processing instruction which we process. 00081 // Others will be silently ignored 00082 try 00083 { 00084 // Execute the processing instruction 00085 PythonInterpreter::execute(value); 00086 } 00087 catch (DataException e) 00088 { 00089 if (abortOnDataException) 00090 { 00091 xercesc::XMLString::release(&type); 00092 xercesc::XMLString::release(&value); 00093 throw; 00094 } 00095 else logger << "Continuing after data error: " << e.what() << endl; 00096 } 00097 } 00098 xercesc::XMLString::release(&type); 00099 xercesc::XMLString::release(&value); 00100 } 00101 catch (...) 00102 { 00103 xercesc::XMLString::release(&type); 00104 xercesc::XMLString::release(&value); 00105 throw; 00106 } 00107 } 00108 00109 00110 void XMLInput::startElement(const XMLCh* const uri, const XMLCh* const n, 00111 const XMLCh* const qname, const xercesc::Attributes& atts) 00112 { 00113 // Validate the state 00114 assert(!states.empty()); 00115 00116 // Check for excessive number of open objects 00117 if (numElements >= maxdepth) 00118 throw DataException("XML-document with elements nested excessively deep"); 00119 00120 // Push the element on the stack 00121 datapair *pElement = &m_EStack[numElements+1]; 00122 pElement->first.reset(n); 00123 pElement->second.reset(); 00124 00125 // Store a pointer to the attributes 00126 attributes = &atts; 00127 00128 switch (states.top()) 00129 { 00130 case SHUTDOWN: 00131 // STATE: Parser is shutting down, and we can ignore all input that 00132 // is still coming 00133 return; 00134 00135 case IGNOREINPUT: 00136 // STATE: Parser is ignoring a part of the input 00137 if (pElement->first.getHash() == endingHashes.top()) 00138 // Increase the count of occurences before the ignore section ends 00139 ++ignore; 00140 ++numElements; 00141 return; 00142 00143 case INIT: 00144 // STATE: The only time the parser comes in this state is when we read 00145 // opening tag of the ROOT tag. 00146 #ifdef PARSE_DEBUG 00147 if (!m_EHStack.empty()) 00148 logger << "Initialize root tag for reading object " 00149 << getCurrentObject() << " (" 00150 << typeid(*getCurrentObject()).name() << ")" << endl; 00151 else 00152 logger << "Initialize root tag for reading object NULL" << endl; 00153 #endif 00154 states.top() = READOBJECT; 00155 endingHashes.push(pElement->first.getHash()); 00156 // Note that there is no break or return here. We also execute the 00157 // statements of the following switch-case. 00158 00159 case READOBJECT: 00160 // STATE: Parser is reading data elements of an object 00161 // Debug 00162 #ifdef PARSE_DEBUG 00163 logger << " Start element " << pElement->first.getName() 00164 << " - object " << getCurrentObject() << endl; 00165 #endif 00166 00167 // Call the handler of the object 00168 assert(!m_EHStack.empty()); 00169 try {getCurrentObject()->beginElement(*this, pElement->first);} 00170 catch (DataException e) 00171 { 00172 if (abortOnDataException) throw; 00173 else logger << "Continuing after data error: " << e.what() << endl; 00174 } 00175 00176 // Now process all attributes. For attributes we only call the 00177 // endElement() member and skip the beginElement() method. 00178 numElements += 1; 00179 if (states.top() != IGNOREINPUT) 00180 for (unsigned int i=0, cnt=atts.getLength(); i<cnt; i++) 00181 { 00182 char* val = xercesc::XMLString::transcode(atts.getValue(i)); 00183 m_EStack[numElements+1].first.reset(atts.getLocalName(i)); 00184 m_EStack[numElements+1].second.setData(val); 00185 #ifdef PARSE_DEBUG 00186 char* attname = xercesc::XMLString::transcode(atts.getQName(i)); 00187 logger << " Processing attribute " << attname 00188 << " - object " << getCurrentObject() << endl; 00189 xercesc::XMLString::release(&attname); 00190 #endif 00191 try {getCurrentObject()->endElement(*this, m_EStack[numElements+1].first, m_EStack[numElements+1].second);} 00192 catch (DataException e) 00193 { 00194 if (abortOnDataException) throw; 00195 else logger << "Continuing after data error: " << e.what() << endl; 00196 } 00197 xercesc::XMLString::release(&val); 00198 // Stop processing attributes if we are now in the ignore mode 00199 if (states.top() == IGNOREINPUT) break; 00200 } 00201 } // End of switch statement 00202 00203 // Outside of this handler, no attributes are available 00204 attributes = NULL; 00205 } 00206 00207 00208 void XMLInput::endElement(const XMLCh* const uri, 00209 const XMLCh* const s, 00210 const XMLCh* const qname) 00211 { 00212 // Validate the state 00213 assert(numElements >= 0); 00214 assert(!states.empty()); 00215 assert(numElements < maxdepth); 00216 00217 // Remove an element from the stack 00218 datapair *pElement = &(m_EStack[numElements--]); 00219 00220 switch (states.top()) 00221 { 00222 case INIT: 00223 // This should never happen! 00224 throw LogicException("Unreachable code reached"); 00225 00226 case SHUTDOWN: 00227 // STATE: Parser is shutting down, and we can ignore all input that is 00228 // still coming 00229 return; 00230 00231 case IGNOREINPUT: 00232 // STATE: Parser is ignoring a part of the input 00233 #ifdef PARSE_DEBUG 00234 logger << " End element " << pElement->first.getName() 00235 << " - IGNOREINPUT state" << endl; 00236 #endif 00237 // Continue if we aren't dealing with the tag being ignored 00238 if (pElement->first.getHash() != endingHashes.top()) return; 00239 if (ignore == 0) 00240 { 00241 // Finished ignoring now 00242 states.pop(); 00243 endingHashes.pop(); 00244 #ifdef PARSE_DEBUG 00245 logger << "Finish IGNOREINPUT state" << endl; 00246 #endif 00247 } 00248 else 00249 --ignore; 00250 break; 00251 00252 case READOBJECT: 00253 // STATE: Parser is reading data elements of an object 00254 #ifdef PARSE_DEBUG 00255 logger << " End element " << pElement->first.getName() 00256 << " - object " << getCurrentObject() << endl; 00257 #endif 00258 00259 // Check if we finished with the current handler 00260 assert(!m_EHStack.empty()); 00261 if (pElement->first.getHash() == endingHashes.top()) 00262 { 00263 // Call the ending handler of the Object, with a special 00264 // flag to specify that this object is now ended 00265 objectEnded = true; 00266 try {getCurrentObject()->endElement(*this, pElement->first, pElement->second);} 00267 catch (DataException e) 00268 { 00269 if (abortOnDataException) throw; 00270 else logger << "Continuing after data error: " << e.what() << endl; 00271 } 00272 objectEnded = false; 00273 #ifdef PARSE_DEBUG 00274 logger << "Finish reading object " << getCurrentObject() << endl; 00275 #endif 00276 // Pop from the handler object stack 00277 prev = getCurrentObject(); 00278 m_EHStack.pop_back(); 00279 endingHashes.pop(); 00280 00281 // Pop from the state stack 00282 states.pop(); 00283 if (m_EHStack.empty()) 00284 shutdown(); 00285 else 00286 { 00287 // Call also the endElement function on the owning object 00288 try {getCurrentObject()->endElement(*this, pElement->first, pElement->second);} 00289 catch (DataException e) 00290 { 00291 if (abortOnDataException) throw; 00292 else logger << "Continuing after data error: " << e.what() << endl; 00293 } 00294 #ifdef PARSE_DEBUG 00295 logger << " End element " << pElement->first.getName() 00296 << " - object " << getCurrentObject() << endl; 00297 #endif 00298 } 00299 } 00300 else 00301 // This tag is not the ending tag of an object 00302 // Call the function of the Object 00303 try {getCurrentObject()->endElement(*this, pElement->first, pElement->second);} 00304 catch (DataException e) 00305 { 00306 if (abortOnDataException) throw; 00307 else logger << "Continuing after data error: " << e.what() << endl; 00308 } 00309 } 00310 } 00311 00312 00313 // Unfortunately the prototype for this handler function differs between 00314 // Xerces-c 2.x and 3.x 00315 #if XERCES_VERSION_MAJOR==2 00316 void XMLInput::characters(const XMLCh *const c, const unsigned int n) 00317 #else 00318 void XMLInput::characters(const XMLCh *const c, const XMLSize_t n) 00319 #endif 00320 { 00321 // No data capture during the ignore state 00322 if (states.top()==IGNOREINPUT) return; 00323 00324 // Process the data 00325 char* name = xercesc::XMLString::transcode(c); 00326 m_EStack[numElements].second.addData(name, strlen(name)); 00327 xercesc::XMLString::release(&name); 00328 } 00329 00330 00331 void XMLInput::warning(const xercesc::SAXParseException& exception) 00332 { 00333 char* message = xercesc::XMLString::transcode(exception.getMessage()); 00334 logger << "Warning: " << message 00335 << " at line: " << exception.getLineNumber() << endl; 00336 xercesc::XMLString::release(&message); 00337 } 00338 00339 00340 DECLARE_EXPORT void XMLInput::readto(Object * pPI) 00341 { 00342 // Keep track of the tag where this object will end 00343 assert(numElements >= -1); 00344 endingHashes.push(m_EStack[numElements+1].first.getHash()); 00345 if (pPI) 00346 { 00347 // Push a new object on the handler stack 00348 #ifdef PARSE_DEBUG 00349 logger << "Start reading object " << pPI 00350 << " (" << typeid(*pPI).name() << ")" << endl; 00351 #endif 00352 prev = getCurrentObject(); 00353 m_EHStack.push_back(make_pair(pPI,static_cast<void*>(NULL))); 00354 states.push(READOBJECT); 00355 } 00356 else 00357 { 00358 // Ignore the complete content of this element 00359 #ifdef PARSE_DEBUG 00360 logger << "Start ignoring input" << endl; 00361 #endif 00362 states.push(IGNOREINPUT); 00363 } 00364 } 00365 00366 00367 void XMLInput::shutdown() 00368 { 00369 // Already shutting down... 00370 if (states.empty() || states.top() == SHUTDOWN) return; 00371 00372 // Message 00373 #ifdef PARSE_DEBUG 00374 logger << " Forcing a shutdown - SHUTDOWN state" << endl; 00375 #endif 00376 00377 // Change the state 00378 states.push(SHUTDOWN); 00379 00380 // Done if we have no elements on the stack, i.e. a normal end. 00381 if (numElements<0) return; 00382 00383 // Call the ending handling of all objects on the stack 00384 // This allows them to finish off in a valid state, and delete any temporary 00385 // objects they may have allocated. 00386 objectEnded = true; 00387 m_EStack[numElements].first.reset("Not a real tag"); 00388 m_EStack[numElements].second.reset(); 00389 while (!m_EHStack.empty()) 00390 { 00391 try {getCurrentObject()->endElement(*this, m_EStack[numElements].first, m_EStack[numElements].second);} 00392 catch (DataException e) 00393 { 00394 if (abortOnDataException) throw; 00395 else logger << "Continuing after data error: " << e.what() << endl; 00396 } 00397 m_EHStack.pop_back(); 00398 } 00399 } 00400 00401 00402 void XMLInput::reset() 00403 { 00404 // Delete the xerces parser object 00405 delete parser; 00406 parser = NULL; 00407 00408 // Call the ending handling of all objects on the stack 00409 // This allows them to finish off in a valid state, and delete any temporary 00410 // objects they may have allocated. 00411 if (!m_EHStack.empty()) 00412 { 00413 // The next line is to avoid calling the endElement handler twice for the 00414 // last object. E.g. endElement handler causes and exception, and as part 00415 // of the exception handling we call the reset method. 00416 if (objectEnded) m_EHStack.pop_back(); 00417 objectEnded = true; 00418 m_EStack[++numElements].first.reset("Not a real tag"); 00419 m_EStack[++numElements].second.reset(); 00420 while (!m_EHStack.empty()) 00421 { 00422 try {getCurrentObject()->endElement(*this, m_EStack[numElements].first, m_EStack[numElements].second);} 00423 catch (DataException e) 00424 { 00425 if (abortOnDataException) throw; 00426 else logger << "Continuing after data error: " << e.what() << endl; 00427 } 00428 m_EHStack.pop_back(); 00429 } 00430 } 00431 00432 // Cleanup of stacks 00433 while (!states.empty()) states.pop(); 00434 while (!endingHashes.empty()) endingHashes.pop(); 00435 00436 // Set all variables back to their starting values 00437 numElements = -1; 00438 ignore = 0; 00439 objectEnded = false; 00440 attributes = NULL; 00441 } 00442 00443 00444 void XMLInput::parse(xercesc::InputSource &in, Object *pRoot, bool validate) 00445 { 00446 try 00447 { 00448 // Create a Xerces parser 00449 parser = xercesc::XMLReaderFactory::createXMLReader(); 00450 00451 // Set the features of the parser. A bunch of the options are dependent 00452 // on whether we want to validate the input or not. 00453 parser->setProperty(xercesc::XMLUni::fgXercesScannerName, const_cast<XMLCh*> 00454 (validate ? xercesc::XMLUni::fgSGXMLScanner : xercesc::XMLUni::fgWFXMLScanner)); 00455 parser->setFeature(xercesc::XMLUni::fgSAX2CoreValidation, validate); 00456 parser->setFeature(xercesc::XMLUni::fgSAX2CoreNameSpacePrefixes, false); 00457 parser->setFeature(xercesc::XMLUni::fgXercesIdentityConstraintChecking, false); 00458 parser->setFeature(xercesc::XMLUni::fgXercesDynamic, false); 00459 parser->setFeature(xercesc::XMLUni::fgXercesSchema, validate); 00460 parser->setFeature(xercesc::XMLUni::fgXercesSchemaFullChecking, false); 00461 parser->setFeature(xercesc::XMLUni::fgXercesValidationErrorAsFatal,true); 00462 parser->setFeature(xercesc::XMLUni::fgXercesIgnoreAnnotations,true); 00463 00464 if (validate) 00465 { 00466 // Specify the no-namespace schema file 00467 string schema = Environment::searchFile("frepple.xsd"); 00468 if (schema.empty()) 00469 throw RuntimeException("Can't find XML schema file 'frepple.xsd'"); 00470 XMLCh *c = xercesc::XMLString::transcode(schema.c_str()); 00471 parser->setProperty( 00472 xercesc::XMLUni::fgXercesSchemaExternalNoNameSpaceSchemaLocation, c 00473 ); 00474 xercesc::XMLString::release(&c); 00475 } 00476 00477 // If we are reading into a NULL object, there is no need to use a 00478 // content handler or a handler stack. 00479 if (pRoot) 00480 { 00481 // Set the event handler. If we are reading into a NULL object, there is 00482 // no need to use a content handler. 00483 parser->setContentHandler(this); 00484 00485 // Get the parser to read data into the object pRoot. 00486 m_EHStack.push_back(make_pair(pRoot,static_cast<void*>(NULL))); 00487 states.push(INIT); 00488 } 00489 00490 // Set the error handler 00491 parser->setErrorHandler(this); 00492 00493 // Parse the input 00494 parser->parse(in); 00495 } 00496 // Note: the reset() method needs to be called in all circumstances. The 00497 // reset method allows all objects to finish in a valid state and clean up 00498 // any memory they may have allocated. 00499 catch (const xercesc::XMLException& toCatch) 00500 { 00501 char* message = xercesc::XMLString::transcode(toCatch.getMessage()); 00502 string msg(message); 00503 xercesc::XMLString::release(&message); 00504 reset(); 00505 throw RuntimeException("Parsing error: " + msg); 00506 } 00507 catch (const xercesc::SAXParseException& toCatch) 00508 { 00509 char* message = xercesc::XMLString::transcode(toCatch.getMessage()); 00510 ostringstream msg; 00511 if (toCatch.getLineNumber() > 0) 00512 msg << "Parsing error: " << message << " at line " << toCatch.getLineNumber(); 00513 else 00514 msg << "Parsing error: " << message; 00515 xercesc::XMLString::release(&message); 00516 reset(); 00517 throw RuntimeException(msg.str()); 00518 } 00519 catch (const exception& toCatch) 00520 { 00521 reset(); 00522 ostringstream msg; 00523 msg << "Error during XML parsing: " << toCatch.what(); 00524 throw RuntimeException(msg.str()); 00525 } 00526 catch (...) 00527 { 00528 reset(); 00529 throw RuntimeException( 00530 "Parsing error: Unexpected exception during XML parsing"); 00531 } 00532 reset(); 00533 } 00534 00535 00536 DECLARE_EXPORT ostream& operator << (ostream& os, const XMLEscape& x) 00537 { 00538 for (const char* p = x.data; *p; ++p) 00539 { 00540 switch (*p) 00541 { 00542 case '&': os << "&"; break; 00543 case '<': os << "<"; break; 00544 case '>': os << ">"; break; 00545 case '"': os << """; break; 00546 case '\'': os << "'"; break; 00547 default: os << *p; 00548 } 00549 } 00550 return os; 00551 } 00552 00553 00554 DECLARE_EXPORT void XMLOutput::incIndent() 00555 { 00556 indentstring[m_nIndent++] = '\t'; 00557 if (m_nIndent > 40) m_nIndent = 40; 00558 indentstring[m_nIndent] = '\0'; 00559 } 00560 00561 00562 DECLARE_EXPORT void XMLOutput::decIndent() 00563 { 00564 if (--m_nIndent < 0) m_nIndent = 0; 00565 indentstring[m_nIndent] = '\0'; 00566 } 00567 00568 00569 DECLARE_EXPORT void XMLOutput::writeElement 00570 (const Keyword& tag, const Object* object, mode m) 00571 { 00572 // Avoid NULL pointers and skip hidden objects 00573 if (!object || object->getHidden()) return; 00574 00575 // Adjust current and parent object pointer 00576 const Object *previousParent = parentObject; 00577 parentObject = currentObject; 00578 currentObject = object; 00579 ++numObjects; 00580 ++numParents; 00581 00582 // Call the write method on the object 00583 if (m != DEFAULT) 00584 // Mode is overwritten 00585 object->writeElement(this, tag, m); 00586 else 00587 // Choose wether to save a reference of the object. 00588 // The root object can't be saved as a reference. 00589 object->writeElement(this, tag, numParents>2 ? REFERENCE : DEFAULT); 00590 00591 // Adjust current and parent object pointer 00592 --numParents; 00593 currentObject = parentObject; 00594 parentObject = previousParent; 00595 } 00596 00597 00598 DECLARE_EXPORT void XMLOutput::writeElementWithHeader(const Keyword& tag, const Object* object) 00599 { 00600 // Root object can't be null... 00601 if (!object) 00602 throw RuntimeException("Can't accept a NULL object as XML root"); 00603 00604 // There should not be any saved objects yet 00605 if (numObjects > 0) 00606 throw LogicException("Can't have multiple headers in a document"); 00607 assert(!parentObject); 00608 assert(!currentObject); 00609 00610 // Write the first line for the xml document 00611 writeString(getHeaderStart()); 00612 00613 // Adjust current object pointer 00614 currentObject = object; 00615 00616 // Write the object 00617 ++numObjects; 00618 ++numParents; 00619 BeginObject(tag, getHeaderAtts()); 00620 object->writeElement(this, tag, NOHEADER); 00621 00622 // Adjust current and parent object pointer 00623 currentObject = NULL; 00624 parentObject = NULL; 00625 } 00626 00627 00628 DECLARE_EXPORT void XMLOutput::writeHeader(const Keyword& tag) 00629 { 00630 // There should not be any saved objects yet 00631 if (numObjects > 0 || !parentObject || !currentObject) 00632 throw LogicException("Writing invalid header to XML document"); 00633 00634 // Write the first line and the opening tag 00635 writeString(getHeaderStart()); 00636 BeginObject(tag, getHeaderAtts()); 00637 00638 // Fake a dummy parent 00639 numParents += 2; 00640 } 00641 00642 00643 DECLARE_EXPORT bool XMLElement::getBool() const 00644 { 00645 switch (getData()[0]) 00646 { 00647 case 'T': 00648 case 't': 00649 case '1': 00650 return true; 00651 case 'F': 00652 case 'f': 00653 case '0': 00654 return false; 00655 } 00656 throw DataException("Invalid boolean value: " + string(getData())); 00657 } 00658 00659 00660 DECLARE_EXPORT const char* Attribute::getName() const 00661 { 00662 if (ch) return ch; 00663 Keyword::tagtable::const_iterator i = Keyword::getTags().find(hash); 00664 if (i == Keyword::getTags().end()) 00665 throw LogicException("Undefined element keyword"); 00666 return i->second->getName().c_str(); 00667 } 00668 00669 00670 DECLARE_EXPORT Keyword::Keyword(const string& name) : strName(name) 00671 { 00672 // Error condition: name is empty 00673 if (name.empty()) throw LogicException("Creating keyword without name"); 00674 00675 // Create a number of variations of the tag name 00676 strStartElement = string("<") + name; 00677 strEndElement = string("</") + name + ">\n"; 00678 strElement = string("<") + name + ">"; 00679 strAttribute = string(" ") + name + "=\""; 00680 00681 // Compute the hash value 00682 dw = hash(name.c_str()); 00683 00684 // Create a properly encoded Xerces string 00685 xercesc::XMLPlatformUtils::Initialize(); 00686 xmlname = xercesc::XMLString::transcode(name.c_str()); 00687 00688 // Verify that the hash is "perfect". 00689 check(); 00690 } 00691 00692 00693 DECLARE_EXPORT Keyword::Keyword(const string& name, const string& nspace) 00694 : strName(name) 00695 { 00696 // Error condition: name is empty 00697 if (name.empty()) 00698 throw LogicException("Creating keyword without name"); 00699 if (nspace.empty()) 00700 throw LogicException("Creating keyword with empty namespace"); 00701 00702 // Create a number of variations of the tag name 00703 strStartElement = string("<") + nspace + ":" + name; 00704 strEndElement = string("</") + nspace + ":" + name + ">\n"; 00705 strElement = string("<") + nspace + ":" + name + ">"; 00706 strAttribute = string(" ") + nspace + ":" + name + "=\""; 00707 00708 // Compute the hash value 00709 dw = hash(name); 00710 00711 // Create a properly encoded Xerces string 00712 xercesc::XMLPlatformUtils::Initialize(); 00713 xmlname = xercesc::XMLString::transcode(string(nspace + ":" + name).c_str()); 00714 00715 // Verify that the hash is "perfect". 00716 check(); 00717 } 00718 00719 00720 void Keyword::check() 00721 { 00722 // To be thread-safe we make sure only a single thread at a time 00723 // can execute this check. 00724 static Mutex dd; 00725 { 00726 ScopeMutexLock l(dd); 00727 tagtable::const_iterator i = getTags().find(dw); 00728 if (i!=getTags().end() && i->second->getName()!=strName) 00729 throw LogicException("Tag XML-tag hash function clashes for " 00730 + i->second->getName() + " and " + strName); 00731 getTags().insert(make_pair(dw,this)); 00732 } 00733 } 00734 00735 00736 DECLARE_EXPORT Keyword::~Keyword() 00737 { 00738 // Remove from the tag list 00739 tagtable::iterator i = getTags().find(dw); 00740 if (i!=getTags().end()) getTags().erase(i); 00741 00742 // Destroy the xerces string 00743 xercesc::XMLString::release(&xmlname); 00744 xercesc::XMLPlatformUtils::Terminate(); 00745 } 00746 00747 00748 DECLARE_EXPORT const Keyword& Keyword::find(const char* name) 00749 { 00750 tagtable::const_iterator i = getTags().find(hash(name)); 00751 return *(i!=getTags().end() ? i->second : new Keyword(name)); 00752 } 00753 00754 00755 DECLARE_EXPORT Keyword::tagtable& Keyword::getTags() 00756 { 00757 static tagtable alltags; 00758 return alltags; 00759 } 00760 00761 00762 DECLARE_EXPORT hashtype Keyword::hash(const char* c) 00763 { 00764 if (c == 0 || *c == 0) return 0; 00765 00766 // Compute hash 00767 const char* curCh = c; 00768 hashtype hashVal = *curCh++; 00769 while (*curCh) 00770 hashVal = (hashVal * 38) + (hashVal >> 24) + *curCh++; 00771 00772 // Divide by modulus 00773 return hashVal % 954991; 00774 } 00775 00776 00777 DECLARE_EXPORT hashtype Keyword::hash(const XMLCh* t) 00778 { 00779 char* c = xercesc::XMLString::transcode(t); 00780 if (c == 0 || *c == 0) 00781 { 00782 xercesc::XMLString::release(&c); 00783 return 0; 00784 } 00785 00786 // Compute hash 00787 const char* curCh = c; 00788 hashtype hashVal = *curCh++; 00789 while (*curCh) 00790 hashVal = (hashVal * 38) + (hashVal >> 24) + *curCh++; 00791 00792 // Divide by modulus 00793 xercesc::XMLString::release(&c); 00794 return hashVal % 954991; 00795 } 00796 00797 00798 DECLARE_EXPORT void Keyword::printTags() 00799 { 00800 for (tagtable::iterator i = getTags().begin(); i != getTags().end(); ++i) 00801 logger << i->second->getName() << " " << i->second->dw << endl; 00802 } 00803 00804 00805 void XMLInputFile::parse(Object *pRoot, bool validate) 00806 { 00807 // Check if string has been set 00808 if (filename.empty()) 00809 throw DataException("Missing input file or directory"); 00810 00811 // Check if the parameter is the name of a directory 00812 struct stat stat_p; 00813 if (stat(filename.c_str(), &stat_p)) 00814 // Can't verify the status 00815 throw RuntimeException("Couldn't open input file '" + filename + "'"); 00816 else if (stat_p.st_mode & S_IFDIR) 00817 { 00818 // Data is a directory: loop through all *.xml files now. No recursion in 00819 // subdirectories is done. 00820 // The code is unfortunately different for Windows & Linux. Sigh... 00821 #ifdef _MSC_VER 00822 string f = filename + "\\*.xml"; 00823 WIN32_FIND_DATA dir_entry_p; 00824 HANDLE h = FindFirstFile(f.c_str(), &dir_entry_p); 00825 if (h == INVALID_HANDLE_VALUE) 00826 throw RuntimeException("Couldn't open input file '" + f + "'"); 00827 do 00828 { 00829 f = filename + '/' + dir_entry_p.cFileName; 00830 XMLInputFile(f.c_str()).parse(pRoot); 00831 } 00832 while (FindNextFile(h, &dir_entry_p)); 00833 FindClose(h); 00834 #elif HAVE_DIRENT_H 00835 struct dirent *dir_entry_p; 00836 DIR *dir_p = opendir(filename.c_str()); 00837 while (NULL != (dir_entry_p = readdir(dir_p))) 00838 { 00839 int n = NAMLEN(dir_entry_p); 00840 if (n > 4 && !strcmp(".xml", dir_entry_p->d_name + n - 4)) 00841 { 00842 string f = filename + '/' + dir_entry_p->d_name; 00843 XMLInputFile(f.c_str()).parse(pRoot, validate); 00844 } 00845 } 00846 closedir(dir_p); 00847 #else 00848 throw RuntimeException("Can't process a directory on your platform"); 00849 #endif 00850 } 00851 else 00852 { 00853 // Normal file 00854 // Parse the file 00855 XMLCh *f = xercesc::XMLString::transcode(filename.c_str()); 00856 xercesc::LocalFileInputSource in(f); 00857 xercesc::XMLString::release(&f); 00858 XMLInput::parse(in, pRoot, validate); 00859 } 00860 } 00861 00862 } // end namespace 00863 } // end namespace