xmlparser.cpp
Go to the documentation of this file.
00001 /***************************************************************************
00002   file : $URL: https://frepple.svn.sourceforge.net/svnroot/frepple/tags/0.9.1/src/utils/xmlparser.cpp $
00003   version : $LastChangedRevision: 1656 $  $LastChangedBy: jdetaeye $
00004   date : $LastChangedDate: 2012-03-27 19:05:34 +0200 (Tue, 27 Mar 2012) $
00005  ***************************************************************************/
00006 
00007 /***************************************************************************
00008  *                                                                         *
00009  * Copyright (C) 2007-2012 by Johan De Taeye, frePPLe bvba                 *
00010  *                                                                         *
00011  * This library is free software; you can redistribute it and/or modify it *
00012  * under the terms of the GNU Lesser General Public License as published   *
00013  * by the Free Software Foundation; either version 2.1 of the License, or  *
00014  * (at your option) any later version.                                     *
00015  *                                                                         *
00016  * This library is distributed in the hope that it will be useful,         *
00017  * but WITHOUT ANY WARRANTY; without even the implied warranty of          *
00018  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser *
00019  * General Public License for more details.                                *
00020  *                                                                         *
00021  * You should have received a copy of the GNU Lesser General Public        *
00022  * License along with this library; if not, write to the Free Software     *
00023  * Foundation Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 *
00024  * USA                                                                     *
00025  *                                                                         *
00026  ***************************************************************************/
00027 
00028 #define FREPPLE_CORE
00029 #include "frepple/utils.h"
00030 #include <sys/stat.h>
00031 
00032 /* Uncomment the next line to create a lot of debugging messages during
00033  * the parsing of XML-data. */
00034 //#define PARSE_DEBUG
00035 
00036 // With VC++ we use the Win32 functions to browse a directory
00037 #ifdef _MSC_VER
00038 #define WIN32_LEAN_AND_MEAN
00039 #include <windows.h>
00040 #else
00041 // With Unix-like systems we use a check suggested by the autoconf tools
00042 #if HAVE_DIRENT_H
00043 # include <dirent.h>
00044 # define NAMLEN(dirent) strlen((dirent)->d_name)
00045 #else
00046 # define dirent direct
00047 # define NAMLEN(dirent) (dirent)->d_namlen
00048 # if HAVE_SYS_NDIR_H
00049 #  include <sys/ndir.h>
00050 # endif
00051 # if HAVE_SYS_DIR_H
00052 #  include <sys/dir.h>
00053 # endif
00054 # if HAVE_NDIR_H
00055 #  include <ndir.h>
00056 # endif
00057 #endif
00058 #endif
00059 
00060 
00061 namespace frepple
00062 {
00063 namespace utils
00064 {
00065 
00066 DECLARE_EXPORT const XMLOutput::content_type XMLOutput::STANDARD = 1;
00067 DECLARE_EXPORT const XMLOutput::content_type XMLOutput::PLAN = 2;
00068 DECLARE_EXPORT const XMLOutput::content_type XMLOutput::PLANDETAIL = 4;
00069 
00070 
00071 void  XMLInput::processingInstruction
00072 (const XMLCh *const target, const XMLCh *const data)
00073 {
00074   char* type = xercesc::XMLString::transcode(target);
00075   char* value = xercesc::XMLString::transcode(data);
00076   try
00077   {
00078     if (!strcmp(type,"python"))
00079     {
00080       // "python" is the only processing instruction which we process.
00081       // Others will be silently ignored
00082       try
00083       {
00084         // Execute the processing instruction
00085         PythonInterpreter::execute(value);
00086       }
00087       catch (const DataException& e)
00088       {
00089         if (abortOnDataException)
00090         {
00091           xercesc::XMLString::release(&type);
00092           xercesc::XMLString::release(&value);
00093           throw;
00094         }
00095         else logger << "Continuing after data error: " << e.what() << endl;
00096       }
00097     }
00098     xercesc::XMLString::release(&type);
00099     xercesc::XMLString::release(&value);
00100   }
00101   catch (...)
00102   {
00103     xercesc::XMLString::release(&type);
00104     xercesc::XMLString::release(&value);
00105     throw;
00106   }
00107 }
00108 
00109 
00110 void XMLInput::startElement(const XMLCh* const uri, const XMLCh* const n,
00111     const XMLCh* const qname, const xercesc::Attributes& atts)
00112 {
00113   // Validate the state
00114   assert(!states.empty());
00115 
00116   // Check for excessive number of open objects
00117   if (numElements >= maxdepth)
00118     throw DataException("XML-document with elements nested excessively deep");
00119 
00120   // Push the element on the stack
00121   datapair *pElement = &m_EStack[numElements+1];
00122   pElement->first.reset(n);
00123   pElement->second.reset();
00124 
00125   // Store a pointer to the attributes
00126   attributes = &atts;
00127 
00128   switch (states.top())
00129   {
00130     case SHUTDOWN:
00131       // STATE: Parser is shutting down, and we can ignore all input that
00132       // is still coming
00133       return;
00134 
00135     case IGNOREINPUT:
00136       // STATE: Parser is ignoring a part of the input
00137       if (pElement->first.getHash() == endingHashes.top())
00138         // Increase the count of occurences before the ignore section ends
00139         ++ignore;
00140       ++numElements;
00141       return;
00142 
00143     case INIT:
00144       // STATE: The only time the parser comes in this state is when we read
00145       // opening tag of the ROOT tag.
00146 #ifdef PARSE_DEBUG
00147       if (!m_EHStack.empty())
00148         logger << "Initialize root tag for reading object "
00149             << getCurrentObject() << " ("
00150             << typeid(*getCurrentObject()).name() << ")" << endl;
00151       else
00152         logger << "Initialize root tag for reading object NULL" << endl;
00153 #endif
00154       states.top() = READOBJECT;
00155       endingHashes.push(pElement->first.getHash());
00156       // Note that there is no break or return here. We also execute the
00157       // statements of the following switch-case.
00158 
00159     case READOBJECT:
00160       // STATE: Parser is reading data elements of an object
00161       // Debug
00162 #ifdef PARSE_DEBUG
00163       logger << "   Start element " << pElement->first.getName()
00164           << " - object " << getCurrentObject() << endl;
00165 #endif
00166 
00167       // Call the handler of the object
00168       assert(!m_EHStack.empty());
00169       try {getCurrentObject()->beginElement(*this, pElement->first);}
00170       catch (const DataException& e)
00171       {
00172         if (abortOnDataException) throw;
00173         else logger << "Continuing after data error: " << e.what() << endl;
00174       }
00175 
00176       // Now process all attributes. For attributes we only call the
00177       // endElement() member and skip the beginElement() method.
00178       numElements += 1;
00179       if (states.top() != IGNOREINPUT)
00180         for (unsigned int i=0, cnt=atts.getLength(); i<cnt; i++)
00181         {
00182           char* val = xercesc::XMLString::transcode(atts.getValue(i));
00183           m_EStack[numElements+1].first.reset(atts.getLocalName(i));
00184           m_EStack[numElements+1].second.setData(val);
00185 #ifdef PARSE_DEBUG
00186           char* attname = xercesc::XMLString::transcode(atts.getQName(i));
00187           logger << "   Processing attribute " << attname
00188               << " - object " << getCurrentObject() << endl;
00189           xercesc::XMLString::release(&attname);
00190 #endif
00191           try {getCurrentObject()->endElement(*this, m_EStack[numElements+1].first, m_EStack[numElements+1].second);}
00192           catch (const DataException& e)
00193           {
00194             if (abortOnDataException) throw;
00195             else logger << "Continuing after data error: " << e.what() << endl;
00196           }
00197           xercesc::XMLString::release(&val);
00198           // Stop processing attributes if we are now in the ignore mode
00199           if (states.top() == IGNOREINPUT) break;
00200         }
00201   }  // End of switch statement
00202 
00203   // Outside of this handler, no attributes are available
00204   attributes = NULL;
00205 }
00206 
00207 
00208 void XMLInput::endElement(const XMLCh* const uri,
00209     const XMLCh* const s,
00210     const XMLCh* const qname)
00211 {
00212   // Validate the state
00213   assert(numElements >= 0);
00214   assert(!states.empty());
00215   assert(numElements < maxdepth);
00216 
00217   // Remove an element from the stack
00218   datapair *pElement = &(m_EStack[numElements--]);
00219 
00220   switch (states.top())
00221   {
00222     case INIT:
00223       // This should never happen!
00224       throw LogicException("Unreachable code reached");
00225 
00226     case SHUTDOWN:
00227       // STATE: Parser is shutting down, and we can ignore all input that is
00228       // still coming
00229       return;
00230 
00231     case IGNOREINPUT:
00232       // STATE: Parser is ignoring a part of the input
00233 #ifdef PARSE_DEBUG
00234       logger << "   End element " << pElement->first.getName()
00235           << " - IGNOREINPUT state" << endl;
00236 #endif
00237       // Continue if we aren't dealing with the tag being ignored
00238       if (pElement->first.getHash() != endingHashes.top()) return;
00239       if (ignore == 0)
00240       {
00241         // Finished ignoring now
00242         states.pop();
00243         endingHashes.pop();
00244 #ifdef PARSE_DEBUG
00245         logger << "Finish IGNOREINPUT state" << endl;
00246 #endif
00247       }
00248       else
00249         --ignore;
00250       break;
00251 
00252     case READOBJECT:
00253       // STATE: Parser is reading data elements of an object
00254 #ifdef PARSE_DEBUG
00255       logger << "   End element " << pElement->first.getName()
00256           << " - object " << getCurrentObject() << endl;
00257 #endif
00258 
00259       // Check if we finished with the current handler
00260       assert(!m_EHStack.empty());
00261       if (pElement->first.getHash() == endingHashes.top())
00262       {
00263         // Call the ending handler of the Object, with a special
00264         // flag to specify that this object is now ended
00265         objectEnded = true;
00266         try {getCurrentObject()->endElement(*this, pElement->first, pElement->second);}
00267         catch (const DataException& e)
00268         {
00269           if (abortOnDataException) throw;
00270           else logger << "Continuing after data error: " << e.what() << endl;
00271         }
00272         objectEnded = false;
00273 #ifdef PARSE_DEBUG
00274         logger << "Finish reading object " << getCurrentObject() << endl;
00275 #endif
00276         // Pop from the handler object stack
00277         prev = getCurrentObject();
00278         m_EHStack.pop_back();
00279         endingHashes.pop();
00280 
00281         // Pop from the state stack
00282         states.pop();
00283         if (m_EHStack.empty())
00284           shutdown();
00285         else
00286         {
00287           // Call also the endElement function on the owning object
00288           try {getCurrentObject()->endElement(*this, pElement->first, pElement->second);}
00289           catch (const DataException& e)
00290           {
00291             if (abortOnDataException) throw;
00292             else logger << "Continuing after data error: " << e.what() << endl;
00293           }
00294 #ifdef PARSE_DEBUG
00295           logger << "   End element " << pElement->first.getName()
00296               << " - object " << getCurrentObject() << endl;
00297 #endif
00298         }
00299       }
00300       else
00301         // This tag is not the ending tag of an object
00302         // Call the function of the Object
00303         try {getCurrentObject()->endElement(*this, pElement->first, pElement->second);}
00304         catch (const DataException& e)
00305         {
00306           if (abortOnDataException) throw;
00307           else logger << "Continuing after data error: " << e.what() << endl;
00308         }
00309   }
00310 }
00311 
00312 
00313 // Unfortunately the prototype for this handler function differs between
00314 // Xerces-c 2.x and 3.x
00315 #if XERCES_VERSION_MAJOR==2
00316 void XMLInput::characters(const XMLCh *const c, const unsigned int n)
00317 #else
00318 void XMLInput::characters(const XMLCh *const c, const XMLSize_t n)
00319 #endif
00320 {
00321   // No data capture during the ignore state
00322   if (states.top()==IGNOREINPUT) return;
00323 
00324   // Process the data
00325   char* name = xercesc::XMLString::transcode(c);
00326   m_EStack[numElements].second.addData(name, strlen(name));
00327   xercesc::XMLString::release(&name);
00328 }
00329 
00330 
00331 void XMLInput::warning(const xercesc::SAXParseException& exception)
00332 {
00333   char* message = xercesc::XMLString::transcode(exception.getMessage());
00334   logger << "Warning: " << message
00335       << " at line: " << exception.getLineNumber() << endl;
00336   xercesc::XMLString::release(&message);
00337 }
00338 
00339 
00340 DECLARE_EXPORT void XMLInput::readto(Object * pPI)
00341 {
00342   // Keep track of the tag where this object will end
00343   assert(numElements >= -1);
00344   endingHashes.push(m_EStack[numElements+1].first.getHash());
00345   if (pPI)
00346   {
00347     // Push a new object on the handler stack
00348 #ifdef PARSE_DEBUG
00349     logger << "Start reading object " << pPI
00350         << " (" << typeid(*pPI).name() << ")" << endl;
00351 #endif
00352     prev = getCurrentObject();
00353     m_EHStack.push_back(make_pair(pPI,static_cast<void*>(NULL)));
00354     states.push(READOBJECT);
00355   }
00356   else
00357   {
00358     // Ignore the complete content of this element
00359 #ifdef PARSE_DEBUG
00360     logger << "Start ignoring input" << endl;
00361 #endif
00362     states.push(IGNOREINPUT);
00363   }
00364 }
00365 
00366 
00367 void XMLInput::shutdown()
00368 {
00369   // Already shutting down...
00370   if (states.empty() || states.top() == SHUTDOWN) return;
00371 
00372   // Message
00373 #ifdef PARSE_DEBUG
00374   logger << "   Forcing a shutdown - SHUTDOWN state" << endl;
00375 #endif
00376 
00377   // Change the state
00378   states.push(SHUTDOWN);
00379 
00380   // Done if we have no elements on the stack, i.e. a normal end.
00381   if (numElements<0) return;
00382 
00383   // Call the ending handling of all objects on the stack
00384   // This allows them to finish off in a valid state, and delete any temporary
00385   // objects they may have allocated.
00386   objectEnded = true;
00387   m_EStack[numElements].first.reset("Not a real tag");
00388   m_EStack[numElements].second.reset();
00389   while (!m_EHStack.empty())
00390   {
00391     try {getCurrentObject()->endElement(*this, m_EStack[numElements].first, m_EStack[numElements].second);}
00392     catch (const DataException& e)
00393     {
00394       if (abortOnDataException) throw;
00395       else logger << "Continuing after data error: " << e.what() << endl;
00396     }
00397     m_EHStack.pop_back();
00398   }
00399 }
00400 
00401 
00402 void XMLInput::reset()
00403 {
00404   // Delete the xerces parser object
00405   delete parser;
00406   parser = NULL;
00407 
00408   // Call the ending handling of all objects on the stack
00409   // This allows them to finish off in a valid state, and delete any temporary
00410   // objects they may have allocated.
00411   if (!m_EHStack.empty())
00412   {
00413     // The next line is to avoid calling the endElement handler twice for the
00414     // last object. E.g. endElement handler causes and exception, and as part
00415     // of the exception handling we call the reset method.
00416     if (objectEnded) m_EHStack.pop_back();
00417     objectEnded = true;
00418     m_EStack[++numElements].first.reset("Not a real tag");
00419     m_EStack[++numElements].second.reset();
00420     while (!m_EHStack.empty())
00421     {
00422       try {getCurrentObject()->endElement(*this, m_EStack[numElements].first, m_EStack[numElements].second);}
00423       catch (const DataException& e)
00424       {
00425         if (abortOnDataException) throw;
00426         else logger << "Continuing after data error: " << e.what() << endl;
00427       }
00428       m_EHStack.pop_back();
00429     }
00430   }
00431 
00432   // Cleanup of stacks
00433   while (!states.empty()) states.pop();
00434   while (!endingHashes.empty()) endingHashes.pop();
00435 
00436   // Set all variables back to their starting values
00437   numElements = -1;
00438   ignore = 0;
00439   objectEnded = false;
00440   attributes = NULL;
00441 }
00442 
00443 
00444 void XMLInput::parse(xercesc::InputSource &in, Object *pRoot, bool validate)
00445 {
00446   try
00447   {
00448     // Create a Xerces parser
00449     parser = xercesc::XMLReaderFactory::createXMLReader();
00450 
00451     // Set the features of the parser. A bunch of the options are dependent
00452     // on whether we want to validate the input or not.
00453     parser->setProperty(xercesc::XMLUni::fgXercesScannerName, const_cast<XMLCh*>
00454         (validate ? xercesc::XMLUni::fgSGXMLScanner : xercesc::XMLUni::fgWFXMLScanner));
00455     parser->setFeature(xercesc::XMLUni::fgSAX2CoreValidation, validate);
00456     parser->setFeature(xercesc::XMLUni::fgSAX2CoreNameSpacePrefixes, false);
00457     parser->setFeature(xercesc::XMLUni::fgXercesIdentityConstraintChecking, false);
00458     parser->setFeature(xercesc::XMLUni::fgXercesDynamic, false);
00459     parser->setFeature(xercesc::XMLUni::fgXercesSchema, validate);
00460     parser->setFeature(xercesc::XMLUni::fgXercesSchemaFullChecking, false);
00461     parser->setFeature(xercesc::XMLUni::fgXercesValidationErrorAsFatal,true);
00462     parser->setFeature(xercesc::XMLUni::fgXercesIgnoreAnnotations,true);
00463 
00464     if (validate)
00465     {
00466       // Specify the no-namespace schema file
00467       string schema = Environment::searchFile("frepple.xsd");
00468       if (schema.empty())
00469         throw RuntimeException("Can't find XML schema file 'frepple.xsd'");
00470       XMLCh *c = xercesc::XMLString::transcode(schema.c_str());
00471       parser->setProperty(
00472         xercesc::XMLUni::fgXercesSchemaExternalNoNameSpaceSchemaLocation, c
00473       );
00474       xercesc::XMLString::release(&c);
00475     }
00476 
00477     // If we are reading into a NULL object, there is no need to use a
00478     // content handler or a handler stack.
00479     if (pRoot)
00480     {
00481       // Set the event handler. If we are reading into a NULL object, there is
00482       // no need to use a content handler.
00483       parser->setContentHandler(this);
00484 
00485       // Get the parser to read data into the object pRoot.
00486       m_EHStack.push_back(make_pair(pRoot,static_cast<void*>(NULL)));
00487       states.push(INIT);
00488     }
00489 
00490     // Set the error handler
00491     parser->setErrorHandler(this);
00492 
00493     // Parse the input
00494     parser->parse(in);
00495   }
00496   // Note: the reset() method needs to be called in all circumstances. The
00497   // reset method allows all objects to finish in a valid state and clean up
00498   // any memory they may have allocated.
00499   catch (const xercesc::XMLException& toCatch)
00500   {
00501     char* message = xercesc::XMLString::transcode(toCatch.getMessage());
00502     string msg(message);
00503     xercesc::XMLString::release(&message);
00504     reset();
00505     throw RuntimeException("Parsing error: " + msg);
00506   }
00507   catch (const xercesc::SAXParseException& toCatch)
00508   {
00509     char* message = xercesc::XMLString::transcode(toCatch.getMessage());
00510     ostringstream msg;
00511     if (toCatch.getLineNumber() > 0)
00512       msg << "Parsing error: " << message << " at line " << toCatch.getLineNumber();
00513     else
00514       msg << "Parsing error: " << message;
00515     xercesc::XMLString::release(&message);
00516     reset();
00517     throw RuntimeException(msg.str());
00518   }
00519   catch (const exception& toCatch)
00520   {
00521     reset();
00522     ostringstream msg;
00523     msg << "Error during XML parsing: " << toCatch.what();
00524     throw RuntimeException(msg.str());
00525   }
00526   catch (...)
00527   {
00528     reset();
00529     throw RuntimeException(
00530       "Parsing error: Unexpected exception during XML parsing");
00531   }
00532   reset();
00533 }
00534 
00535 
00536 DECLARE_EXPORT ostream& operator << (ostream& os, const XMLEscape& x)
00537 {
00538   for (const char* p = x.data; *p; ++p)
00539   {
00540     switch (*p)
00541     {
00542       case '&': os << "&amp;"; break;
00543       case '<': os << "&lt;"; break;
00544       case '>': os << "&gt;"; break;
00545       case '"': os << "&quot;"; break;
00546       case '\'': os << "&apos;"; break;
00547       default: os << *p;
00548     }
00549   }
00550   return os;
00551 }
00552 
00553 
00554 DECLARE_EXPORT void XMLOutput::incIndent()
00555 {
00556   indentstring[m_nIndent++] = '\t';
00557   if (m_nIndent > 40) m_nIndent = 40;
00558   indentstring[m_nIndent] = '\0';
00559 }
00560 
00561 
00562 DECLARE_EXPORT void XMLOutput::decIndent()
00563 {
00564   if (--m_nIndent < 0) m_nIndent = 0;
00565   indentstring[m_nIndent] = '\0';
00566 }
00567 
00568 
00569 DECLARE_EXPORT void XMLOutput::writeElement
00570 (const Keyword& tag, const Object* object, mode m)
00571 {
00572   // Avoid NULL pointers and skip hidden objects
00573   if (!object || object->getHidden()) return;
00574 
00575   // Adjust current and parent object pointer
00576   const Object *previousParent = parentObject;
00577   parentObject = currentObject;
00578   currentObject = object;
00579   ++numObjects;
00580   ++numParents;
00581 
00582   // Call the write method on the object
00583   if (m != DEFAULT)
00584     // Mode is overwritten
00585     object->writeElement(this, tag, m);
00586   else
00587     // Choose wether to save a reference of the object.
00588     // The root object can't be saved as a reference.
00589     object->writeElement(this, tag, numParents>2 ? REFERENCE : DEFAULT);
00590 
00591   // Adjust current and parent object pointer
00592   --numParents;
00593   currentObject = parentObject;
00594   parentObject = previousParent;
00595 }
00596 
00597 
00598 DECLARE_EXPORT void XMLOutput::writeElementWithHeader(const Keyword& tag, const Object* object)
00599 {
00600   // Root object can't be null...
00601   if (!object)
00602     throw RuntimeException("Can't accept a NULL object as XML root");
00603 
00604   // There should not be any saved objects yet
00605   if (numObjects > 0)
00606     throw LogicException("Can't have multiple headers in a document");
00607   assert(!parentObject);
00608   assert(!currentObject);
00609 
00610   // Write the first line for the xml document
00611   writeString(getHeaderStart());
00612 
00613   // Adjust current object pointer
00614   currentObject = object;
00615 
00616   // Write the object
00617   ++numObjects;
00618   ++numParents;
00619   BeginObject(tag, getHeaderAtts());
00620   object->writeElement(this, tag, NOHEADER);
00621 
00622   // Adjust current and parent object pointer
00623   currentObject = NULL;
00624   parentObject = NULL;
00625 }
00626 
00627 
00628 DECLARE_EXPORT void XMLOutput::writeHeader(const Keyword& tag)
00629 {
00630   // There should not be any saved objects yet
00631   if (numObjects > 0 || !parentObject || !currentObject)
00632     throw LogicException("Writing invalid header to XML document");
00633 
00634   // Write the first line and the opening tag
00635   writeString(getHeaderStart());
00636   BeginObject(tag, getHeaderAtts());
00637 
00638   // Fake a dummy parent
00639   numParents += 2;
00640 }
00641 
00642 
00643 DECLARE_EXPORT bool XMLElement::getBool() const
00644 {
00645   switch (getData()[0])
00646   {
00647     case 'T':
00648     case 't':
00649     case '1':
00650       return true;
00651     case 'F':
00652     case 'f':
00653     case '0':
00654       return false;
00655   }
00656   throw DataException("Invalid boolean value: " + string(getData()));
00657 }
00658 
00659 
00660 DECLARE_EXPORT const char* Attribute::getName() const
00661 {
00662   if (ch) return ch;
00663   Keyword::tagtable::const_iterator i = Keyword::getTags().find(hash);
00664   if (i == Keyword::getTags().end())
00665     throw LogicException("Undefined element keyword");
00666   return i->second->getName().c_str();
00667 }
00668 
00669 
00670 DECLARE_EXPORT Keyword::Keyword(const string& name) : strName(name)
00671 {
00672   // Error condition: name is empty
00673   if (name.empty()) throw LogicException("Creating keyword without name");
00674 
00675   // Create a number of variations of the tag name
00676   strStartElement = string("<") + name;
00677   strEndElement = string("</") + name + ">\n";
00678   strElement = string("<") + name + ">";
00679   strAttribute = string(" ") + name + "=\"";
00680 
00681   // Compute the hash value
00682   dw = hash(name.c_str());
00683 
00684   // Create a properly encoded Xerces string
00685   xercesc::XMLPlatformUtils::Initialize();
00686   xmlname = xercesc::XMLString::transcode(name.c_str());
00687 
00688   // Verify that the hash is "perfect".
00689   check();
00690 }
00691 
00692 
00693 DECLARE_EXPORT Keyword::Keyword(const string& name, const string& nspace)
00694   : strName(name)
00695 {
00696   // Error condition: name is empty
00697   if (name.empty())
00698     throw LogicException("Creating keyword without name");
00699   if (nspace.empty())
00700     throw LogicException("Creating keyword with empty namespace");
00701 
00702   // Create a number of variations of the tag name
00703   strStartElement = string("<") + nspace + ":" + name;
00704   strEndElement = string("</") + nspace + ":" + name + ">\n";
00705   strElement = string("<") + nspace + ":" + name + ">";
00706   strAttribute = string(" ") + nspace + ":" + name + "=\"";
00707 
00708   // Compute the hash value
00709   dw = hash(name);
00710 
00711   // Create a properly encoded Xerces string
00712   xercesc::XMLPlatformUtils::Initialize();
00713   xmlname = xercesc::XMLString::transcode(string(nspace + ":" + name).c_str());
00714 
00715   // Verify that the hash is "perfect".
00716   check();
00717 }
00718 
00719 
00720 void Keyword::check()
00721 {
00722   // To be thread-safe we make sure only a single thread at a time
00723   // can execute this check.
00724   static Mutex dd;
00725   {
00726     ScopeMutexLock l(dd);
00727     tagtable::const_iterator i = getTags().find(dw);
00728     if (i!=getTags().end() && i->second->getName()!=strName)
00729       throw LogicException("Tag XML-tag hash function clashes for "
00730           + i->second->getName() + " and " + strName);
00731     getTags().insert(make_pair(dw,this));
00732   }
00733 }
00734 
00735 
00736 DECLARE_EXPORT Keyword::~Keyword()
00737 {
00738   // Remove from the tag list
00739   tagtable::iterator i = getTags().find(dw);
00740   if (i!=getTags().end()) getTags().erase(i);
00741 
00742   // Destroy the xerces string
00743   xercesc::XMLString::release(&xmlname);
00744   xercesc::XMLPlatformUtils::Terminate();
00745 }
00746 
00747 
00748 DECLARE_EXPORT const Keyword& Keyword::find(const char* name)
00749 {
00750   tagtable::const_iterator i = getTags().find(hash(name));
00751   return *(i!=getTags().end() ? i->second : new Keyword(name));
00752 }
00753 
00754 
00755 DECLARE_EXPORT Keyword::tagtable& Keyword::getTags()
00756 {
00757   static tagtable alltags;
00758   return alltags;
00759 }
00760 
00761 
00762 DECLARE_EXPORT hashtype Keyword::hash(const char* c)
00763 {
00764   if (c == 0 || *c == 0) return 0;
00765 
00766   // Compute hash
00767   const char* curCh = c;
00768   hashtype hashVal = *curCh++;
00769   while (*curCh)
00770     hashVal = (hashVal * 38) + (hashVal >> 24) + *curCh++;
00771 
00772   // Divide by modulus
00773   return hashVal % 954991;
00774 }
00775 
00776 
00777 DECLARE_EXPORT hashtype Keyword::hash(const XMLCh* t)
00778 {
00779   char* c = xercesc::XMLString::transcode(t);
00780   if (c == 0 || *c == 0)
00781   {
00782     xercesc::XMLString::release(&c);
00783     return 0;
00784   }
00785 
00786   // Compute hash
00787   const char* curCh = c;
00788   hashtype hashVal = *curCh++;
00789   while (*curCh)
00790     hashVal = (hashVal * 38) + (hashVal >> 24) + *curCh++;
00791 
00792   // Divide by modulus
00793   xercesc::XMLString::release(&c);
00794   return hashVal % 954991;
00795 }
00796 
00797 
00798 DECLARE_EXPORT void Keyword::printTags()
00799 {
00800   for (tagtable::iterator i = getTags().begin(); i != getTags().end(); ++i)
00801     logger << i->second->getName() << "   " << i->second->dw << endl;
00802 }
00803 
00804 
00805 void XMLInputFile::parse(Object *pRoot, bool validate)
00806 {
00807   // Check if string has been set
00808   if (filename.empty())
00809     throw DataException("Missing input file or directory");
00810 
00811   // Check if the parameter is the name of a directory
00812   struct stat stat_p;
00813   if (stat(filename.c_str(), &stat_p))
00814     // Can't verify the status
00815     throw RuntimeException("Couldn't open input file '" + filename + "'");
00816   else if (stat_p.st_mode & S_IFDIR)
00817   {
00818     // Data is a directory: loop through all *.xml files now. No recursion in
00819     // subdirectories is done.
00820     // The code is unfortunately different for Windows & Linux. Sigh...
00821 #ifdef _MSC_VER
00822     string f = filename + "\\*.xml";
00823     WIN32_FIND_DATA dir_entry_p;
00824     HANDLE h = FindFirstFile(f.c_str(), &dir_entry_p);
00825     if (h == INVALID_HANDLE_VALUE)
00826       throw RuntimeException("Couldn't open input file '" + f + "'");
00827     do
00828     {
00829       f = filename + '/' + dir_entry_p.cFileName;
00830       XMLInputFile(f.c_str()).parse(pRoot);
00831     }
00832     while (FindNextFile(h, &dir_entry_p));
00833     FindClose(h);
00834 #elif HAVE_DIRENT_H
00835     struct dirent *dir_entry_p;
00836     DIR *dir_p = opendir(filename.c_str());
00837     while (NULL != (dir_entry_p = readdir(dir_p)))
00838     {
00839       int n = NAMLEN(dir_entry_p);
00840       if (n > 4 && !strcmp(".xml", dir_entry_p->d_name + n - 4))
00841       {
00842         string f = filename + '/' + dir_entry_p->d_name;
00843         XMLInputFile(f.c_str()).parse(pRoot, validate);
00844       }
00845     }
00846     closedir(dir_p);
00847 #else
00848     throw RuntimeException("Can't process a directory on your platform");
00849 #endif
00850   }
00851   else
00852   {
00853     // Normal file
00854     // Parse the file
00855     XMLCh *f = xercesc::XMLString::transcode(filename.c_str());
00856     xercesc::LocalFileInputSource in(f);
00857     xercesc::XMLString::release(&f);
00858     XMLInput::parse(in, pRoot, validate);
00859   }
00860 }
00861 
00862 } // end namespace
00863 } // end namespace

Documentation generated for frePPLe by  doxygen