XmlDoc.cpp

Go to the documentation of this file.
00001 /* 
00002  * wsdlpull - A C++ parser  for WSDL  (Web services description language)
00003  * XmlNode_t and XmlDoc_t for the WsdlParser
00004  * Copyright (C) 2009 Daniel Rodriguez
00005  *
00006  * This library is free software; you can redistribute it and/or
00007  * modify it under the terms of the GNU Library General Public
00008  * License as published by the Free Software Foundation; either
00009  * version 2 of the License, or (at your option) any later version.
00010  *
00011  * This library is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014  * Library General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU Library General Public
00017  * License along with this library; if not, write to the Free
00018  * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
00019  */
00020 
00021 #include "xmlpull/XmlDoc.h"
00022 
00023 
00024 
00026   // XmlNode_t
00028 
00029   XmlNode_t::XmlNode_t( const std::string &p_name, size_t p_depth)
00030   {
00031     m_name = p_name;
00032     m_depth = p_depth;
00033 
00034     m_empty = p_name.empty();
00035 
00036     mp_parent = NULL;
00037     mp_prev = NULL;
00038     mp_next = NULL;
00039   }
00040 
00041   XmlNode_t::XmlNode_t( const XmlNode_t &p_xmlNode)
00042   {
00043     *this = p_xmlNode;
00044   }
00045 
00046   XmlNode_t &
00047   XmlNode_t::operator =( const XmlNode_t &p_xmlNode)
00048   {
00049     m_name = p_xmlNode.m_name;
00050     m_text = p_xmlNode.m_text;
00051     m_depth = p_xmlNode.m_depth;
00052 
00053     m_empty = p_xmlNode.m_empty;
00054 
00055     mp_parent = NULL;
00056     mp_prev = NULL;
00057     mp_next = NULL;
00058 
00059     m_attributes = p_xmlNode.m_attributes;
00060     m_mapAttributes = p_xmlNode.m_mapAttributes;
00061 
00062     m_mapNodes = p_xmlNode.m_mapNodes;
00063 
00064     this->deallocateNodes();
00065 
00066     for( size_t l_i = 0; l_i < p_xmlNode.m_nodes.size(); l_i++) {
00067 
00068       XmlNode_t *l_tmpNode = new XmlNode_t( *( p_xmlNode.m_nodes[ l_i]));
00069 
00070       l_tmpNode->setParent( this);
00071 
00072       if( l_i > 0) {
00073         l_tmpNode->setPrev( m_nodes.back());
00074         m_nodes.back()->setNext( l_tmpNode);
00075       }
00076       m_nodes.push_back( l_tmpNode);
00077     }
00078 
00079     return *this;
00080   }
00081 
00082   XmlNode_t::~XmlNode_t()
00083   {
00084     this->deallocateNodes();
00085   }
00086 
00087   void
00088   XmlNode_t::deallocateNodes( void)
00089   {
00090     for( size_t l_i = 0; l_i < m_nodes.size(); l_i++)
00091       delete m_nodes.at( l_i);
00092     m_nodes.clear();
00093   }
00094 
00095   void
00096   XmlNode_t::clear( void)
00097   {
00098     m_name.clear();
00099     m_text.clear();
00100 
00101     m_depth = 1;
00102 
00103     m_empty = true;
00104 
00105     mp_parent = NULL;
00106     mp_prev = NULL;
00107     mp_next = NULL;
00108 
00109     m_attributes.clear();
00110     m_mapAttributes.clear();
00111 
00112     this->deallocateNodes();
00113     m_mapNodes.clear();
00114   }
00115 
00116   XmlNode_t &
00117   XmlNode_t::getParent( void) const
00118   {
00119     return *mp_parent;
00120   }
00121 
00122   void
00123   XmlNode_t::setParent( XmlNode_t &p_parent)
00124   {
00125     mp_parent = &p_parent;
00126   }
00127 
00128   void
00129   XmlNode_t::setParent( XmlNode_t *p_parent)
00130   {
00131     mp_parent = p_parent;
00132   }
00133 
00134   XmlNode_t &
00135   XmlNode_t::getPrev( void) const
00136   {
00137     return *mp_prev;
00138   }
00139 
00140   void
00141   XmlNode_t::setPrev( XmlNode_t &p_prev)
00142   {
00143     mp_prev = &p_prev;
00144   }
00145 
00146   void
00147   XmlNode_t::setPrev( XmlNode_t *p_prev)
00148   {
00149     mp_prev = p_prev;
00150   }
00151 
00152   XmlNode_t &
00153   XmlNode_t::getNext( void) const
00154   {
00155     return *mp_next;
00156   }
00157 
00158   void
00159   XmlNode_t::setNext( XmlNode_t &p_next)
00160   {
00161     mp_next = &p_next;
00162   }
00163 
00164   void
00165   XmlNode_t::setNext( XmlNode_t *p_next)
00166   {
00167     mp_next = p_next;
00168   }
00169 
00170   const std::string &
00171   XmlNode_t::getName( void) const
00172   {
00173     return m_name;
00174   }
00175 
00176   void
00177   XmlNode_t::setName( const std::string &p_name, bool p_empty)
00178   {
00179     m_name = p_name;
00180     m_empty = p_empty;
00181   }
00182 
00183   const std::string &
00184   XmlNode_t::getText( void) const
00185   {
00186     return m_text;
00187   }
00188 
00189   size_t
00190   XmlNode_t::getDepth( void) const
00191   {
00192     return m_depth;
00193   }
00194 
00195   void
00196   XmlNode_t::setDepth( size_t p_depth)
00197   {
00198     m_depth = p_depth;
00199   }
00200 
00201   bool
00202   XmlNode_t::isRootNode( void) const
00203   {
00204     return mp_parent == NULL;
00205   }
00206 
00207   bool
00208   XmlNode_t::isTextNode( void) const
00209   {
00210     return m_nodes.empty();
00211   }
00212 
00213   XmlNode_t &
00214   XmlNode_t::addNode( const std::string &p_name, bool p_empty)
00215   {
00216     XmlNode_t *l_xmlNode = new XmlNode_t( p_name);
00217     l_xmlNode->setEmpty( p_empty);
00218     return this->addNode( l_xmlNode);
00219   }
00220     
00221   XmlNode_t &
00222   XmlNode_t::addNode( XmlNode_t *p_xmlNode)
00223   {
00224 
00225     m_text.clear();
00226 
00227     XmlNode_t *l_xmlNode = p_xmlNode;
00228 
00229     if( l_xmlNode == NULL)
00230       l_xmlNode = new XmlNode_t();
00231 
00232     l_xmlNode->setParent( this);
00233 
00234     if( m_nodes.empty() == false) {
00235 
00236       l_xmlNode->setPrev( m_nodes.back());
00237       m_nodes.back()->setNext( l_xmlNode);
00238 
00239     }
00240 
00241     l_xmlNode->setDepth( m_depth + 1);
00242     m_nodes.push_back( l_xmlNode);
00243 
00244     m_mapNodes.insert( std::make_pair( l_xmlNode->getName(), m_nodes.size() - 1));
00245 
00246     return *l_xmlNode;
00247   }
00248   
00249   void
00250   XmlNode_t::addAttribute( const std::string &p_name, const std::string &p_value)
00251   {
00252     m_attributes.push_back( std::make_pair( p_name, p_value));
00253 
00254     m_mapAttributes.insert( std::make_pair( p_name, m_attributes.size() - 1));
00255   }
00256 
00257   bool
00258   XmlNode_t::getAttribute( const std::string &p_name, std::string &p_result) const
00259   {
00260     MapAttributes_t::const_iterator l_it = m_mapAttributes.find( p_name);
00261     if( l_it == m_mapAttributes.end())
00262       return false;
00263     size_t l_index = (*l_it).second;
00264     p_result = m_attributes[ l_index].second;
00265     return true;
00266   }
00267 
00268   void
00269   XmlNode_t::setText( const std::string &p_text)
00270   {
00271     m_nodes.clear();
00272 
00273     m_text = p_text;
00274   }
00275 
00276   void
00277   XmlNode_t::setEmpty( bool p_empty)
00278   {
00279     m_empty = p_empty;
00280   }
00281 
00282   bool
00283   XmlNode_t::empty( void) const
00284   {
00285     return m_empty;
00286   }
00287 
00288   XmlNode_t *
00289   XmlNode_t::getNode( const std::string &p_name, size_t p_index) const
00290   {
00291 
00292     std::pair<MultiMapNodes_t::const_iterator, MultiMapNodes_t::const_iterator> l_itRange;
00293     l_itRange = m_mapNodes.equal_range( p_name);
00294 
00295     MultiMapNodes_t::const_iterator &l_itFirst = l_itRange.first;
00296     MultiMapNodes_t::const_iterator &l_itLast = l_itRange.second;
00297 
00298     XmlNode_t *l_resultNode = NULL;
00299 
00300     size_t l_index = 0;
00301 
00302     while( l_itFirst != l_itLast) {
00303 
00304       if( l_index == p_index) {
00305 
00306         l_resultNode = m_nodes[ (*l_itFirst).second];
00307         break;
00308       }
00309       ++l_index;
00310       ++l_itFirst;
00311     }
00312 
00313     return l_resultNode;
00314   }
00315 
00316   void
00317   XmlNode_t::getAllChildren( XmlNode_t::VectorNodePtrs_t &p_children)
00318   {
00319     p_children = m_nodes;
00320   }
00321 
00322   void
00323   XmlNode_t::findDirectChildren( const std::string &p_name, XmlNode_t::VectorNodePtrs_t &p_children)
00324   {
00325     for( size_t l_i = 0; l_i < m_nodes.size(); l_i++){
00326 
00327       if( p_name == m_nodes[ l_i]->getName())
00328         p_children.push_back( m_nodes[ l_i]);
00329     }
00330   }
00331 
00332   void
00333   XmlNode_t::findSelfOrChildren( const std::string &p_name, XmlNode_t::VectorNodePtrs_t &p_children, bool p_lazyRelativeMatch)
00334   {
00335 
00336     if( m_name == p_name) {
00337       p_children.push_back( this);
00338       return;
00339     }
00340 
00341     if( p_lazyRelativeMatch == true) {
00342 
00343       for( size_t l_i = 0; l_i < m_nodes.size(); l_i++){
00344 
00345         m_nodes[ l_i]->findSelfOrChildren( p_name, p_children, p_lazyRelativeMatch);
00346 
00347       }
00348     }
00349   }
00350 
00351   void
00352   XmlNode_t::findAny( const std::string &p_name, XmlNode_t::VectorNodePtrs_t &p_children)
00353   {
00354     if( m_name == p_name)
00355       p_children.push_back( this);
00356 
00357     for( size_t l_i = 0; l_i < m_nodes.size(); l_i++){
00358 
00359       m_nodes[ l_i]->findAny( p_name, p_children);
00360 
00361     }
00362   }
00363 
00364   bool
00365   XmlNode_t::operator ==( const XmlNode_t &p_xmlNode) const
00366   {
00367     return m_name == p_xmlNode.m_name;
00368   }
00369 
00370   std::ostream &
00371   operator <<( std::ostream &p_ostream, const XmlNode_t &p_xmlNode)
00372   {
00373     p_ostream << std::string( ( p_xmlNode.m_depth - 1) * XmlNode_t::WS_AMOUNT, ' ');
00374 
00375     // Output Start tag
00376     p_ostream << "<" << p_xmlNode.m_name;
00377 
00378     // Attributes
00379     for( size_t l_i = 0; l_i < p_xmlNode.m_attributes.size(); l_i++) {
00380       p_ostream << " "
00381                 << p_xmlNode.m_attributes[ l_i].first
00382                 << "=\"" << p_xmlNode.m_attributes[ l_i].second
00383                 << "\"";
00384     }
00385     // Close Start tag
00386     p_ostream << ">";
00387 
00388     // Output Text or child nodes
00389     if( p_xmlNode.isTextNode() == true) {
00390       // Output Text
00391       p_ostream << p_xmlNode.m_text;
00392     } else {
00393       p_ostream << std::endl;
00394       for( size_t l_i = 0; l_i < p_xmlNode.m_nodes.size(); l_i++) {
00395         // Output child nodes
00396         p_ostream << *( p_xmlNode.m_nodes[ l_i]);
00397       }
00398     }
00399 
00400     if( p_xmlNode.isTextNode() == false)
00401       p_ostream << std::string( ( p_xmlNode.m_depth - 1)* XmlNode_t::WS_AMOUNT, ' ');
00402 
00403     // Output End tag
00404     p_ostream << "</" << p_xmlNode.m_name << ">" << std::endl;
00405 
00406     return p_ostream;
00407   }
00408 
00410   // XmlDoc_t
00412 
00413   XmlDoc_t::XmlDoc_t( const XmlNode_t &p_xmlNode)
00414   {
00415     m_rootNode = p_xmlNode;
00416 
00417     m_processEnvAndBody = false;
00418     m_lazyRelativeMatch = true;
00419   }
00420 
00421   void
00422   XmlDoc_t::clear( void)
00423   {
00424     // m_version.clear();
00425     // m_encoding.clear();
00426     m_rootNode.clear();
00427   }
00428 
00429   void
00430   XmlDoc_t::setProcessEnvAndBody( bool p_processEnvAndBody)
00431   {
00432     m_processEnvAndBody = p_processEnvAndBody;
00433   }
00434 
00435   bool
00436   XmlDoc_t::getProcessEnvAndBody( void) const
00437   {
00438     return m_processEnvAndBody;
00439   }
00440 
00441   void
00442   XmlDoc_t::setLazyRelativeMatch( bool p_lazyRelativeMatch)
00443   {
00444     m_lazyRelativeMatch = p_lazyRelativeMatch;
00445   }
00446 
00447   bool
00448   XmlDoc_t::getLazyRelativeMatch( void) const
00449   {
00450     return m_lazyRelativeMatch;
00451   }
00452 
00453   XmlNode_t &
00454   XmlDoc_t::setRootNode( const XmlNode_t &p_xmlNode)
00455   {
00456     m_rootNode = p_xmlNode;
00457     return m_rootNode;
00458   }
00459 
00460   XmlNode_t &
00461   XmlDoc_t::getRootNode( void)
00462   {
00463     return m_rootNode;
00464   }
00465 
00466   const XmlNode_t &
00467   XmlDoc_t::getRootNode( void) const
00468   {
00469     return m_rootNode;
00470   }
00471 
00472   bool
00473   XmlDoc_t::xpath( const std::string &p_xpath, std::vector< std::string> &p_results, size_t p_index)
00474   {
00475 
00476     std::vector< XmlNode_t *> l_nodeSet[ 2];
00477     size_t l_curSetIndex = 0;
00478 
00479     // Seed Initial Set of nodes: either skip /Envelope/Body
00480     // or be it envelope
00481     if( m_processEnvAndBody == true) {
00482       l_nodeSet[ l_curSetIndex].push_back( &m_rootNode);
00483     } else {
00484 
00485       // /Envelope is the root node, no need to look for it
00486       // Get his first child: "Body"
00487       XmlNode_t *l_tmpNode = m_rootNode.getNode( "Body");
00488 
00489       // This shouldn't happen, as there /Envelope/Body
00490       // should always be there
00491       if( l_tmpNode == NULL)
00492         return false;
00493 
00494       // Insert all children under /Envelope/Body
00495       l_tmpNode->getAllChildren( l_nodeSet[ l_curSetIndex]);
00496 
00497       // If no children ... bail out
00498       if( l_nodeSet[ l_curSetIndex].empty() == true)
00499         return false;
00500     }
00501 
00502     std::string l_name, l_xpath;
00503     std::string::size_type l_slashPos = 0;
00504     std::string::size_type l_nonSlashPos = 0;
00505     size_t l_matchCounter = 0;
00506     bool l_matchAny = false;
00507     bool l_matchAttribute = false;
00508     bool l_lazyRelativeMatch = false;
00509 
00510     // Check if root match is sought
00511     if( p_xpath.find( "/") == 0 && p_xpath.find( "//") != 0) {
00512 
00513       // Find the name token
00514       l_slashPos = p_xpath.find( "/", 1);
00515       l_name = p_xpath.substr( 1, l_slashPos - 1);
00516 
00517       // Check the already seeded set
00518       for( size_t l_i = 0; l_i < l_nodeSet[ l_curSetIndex].size(); l_i++) {
00519 
00520         XmlNode_t *l_tmpNode = l_nodeSet[ l_curSetIndex][ l_i];
00521 
00522         // If a name match is found, seed the alternate set
00523         if( l_name == l_tmpNode->getName())
00524           l_nodeSet[ !l_curSetIndex].push_back( l_tmpNode);
00525       }
00526 
00527       // Switch the main and alternate sets
00528       l_curSetIndex = !l_curSetIndex;
00529 
00530       // If root match was expected but the set is empty
00531       // we may safely return false
00532       if( l_nodeSet[ l_curSetIndex].empty() == true)
00533         return false;
00534 
00535       // Else, indicate that a match was found. This disables "lazyEvaluationMatch"
00536       ++l_matchCounter;
00537     }
00538 
00539     // Record the xpath expression before entering the main search loop
00540     l_xpath = p_xpath;
00541 
00542     // Do exit when we reach the End of String
00543     while( l_slashPos != std::string::npos) {
00544 
00545       // Until we see a double slash, we will not match any single node
00546       l_matchAny = false;
00547       // The local lazyRelativeMatch is false and can only once be true
00548       // During the first match and when the global lazyRelativeMatch is set
00549       l_lazyRelativeMatch = false;
00550 
00551       // Reduce the xpath expression to purge the already consumed tokens
00552       l_xpath = l_xpath.substr( l_slashPos);
00553 
00554       // If no further input is available ... break away
00555       if( l_xpath.empty() == true)
00556         break;
00557 
00558       l_slashPos = l_xpath.find( "/");
00559 
00560       if( l_slashPos == 0) {
00561 
00562         // Slash found at the beginning of the string
00563 
00564         // Check for a "matchAny" doubleslash
00565         if( l_xpath.find( "//") == 0)
00566           l_matchAny = true;
00567 
00568         // Locate the start of the token after the slash(es)
00569         l_nonSlashPos = l_xpath.find_first_not_of( "/");
00570 
00571         // Check if there are any characters left
00572         // This could well be a trailing slash
00573         if( l_nonSlashPos == std::string::npos)
00574           break;
00575 
00576         // Locate next slash after the token
00577         l_slashPos = l_xpath.find( "/", l_nonSlashPos);
00578 
00579         // Retrieve the token
00580         l_name = l_xpath.substr( l_nonSlashPos, l_slashPos - l_nonSlashPos);
00581 
00582       } else {
00583 
00584         // Next slash is somewhere in the middle of the xpath expression
00585         // or there are no further slashes ( l_slashPos == npos)
00586         // in any case the substr is from 0 to l_slashPos
00587         l_name = l_xpath.substr( 0, l_slashPos);
00588 
00589         // If no match has been done, then allow lazyRelativeMatch
00590         // to act (if so configured)
00591         if( l_matchCounter == 0 && m_lazyRelativeMatch == true)
00592           l_lazyRelativeMatch = true;
00593       }
00594 
00595       // Indicate that a match happened (to avoid lazyRelativeMatch)
00596       ++l_matchCounter;
00597 
00598       // If no token is found ... bail out
00599       if( l_name.empty() == true)
00600         break;
00601 
00602       // Check if attribute values are sought
00603       if( l_name[ 0] == '@') {
00604 
00605         // Mark the fact that an attribut is sought
00606         l_matchAttribute = true;
00607         // Purge the "@" from the token
00608         l_name = l_name.substr( 1);
00609 
00610         // break away, since the previous token has been a match
00611         // and attributes are not matched in the xml tree
00612         // but rather extracted from the last set of matched nodes
00613         break;
00614       }
00615 
00616       // Clear the alternate set, as we are in a loop
00617       l_nodeSet[ !l_curSetIndex].clear();
00618 
00619       for( size_t l_i = 0; l_i < l_nodeSet[ l_curSetIndex].size(); l_i++) {
00620 
00621         XmlNode_t *l_tmpNode = l_nodeSet[ l_curSetIndex][ l_i];
00622 
00623         // Match any node according to "name" if sought
00624         if( l_matchAny == true)
00625           l_tmpNode->findAny( l_name, l_nodeSet[ !l_curSetIndex]);
00626         // if lazyRelative is acting, go down the tree looking for the first match of "name"
00627         else if( l_lazyRelativeMatch == true)
00628           l_tmpNode->findSelfOrChildren( l_name, l_nodeSet[ !l_curSetIndex], l_lazyRelativeMatch);
00629         // in any other case, see if any direct children matches "name"
00630         else
00631           l_tmpNode->findDirectChildren( l_name, l_nodeSet[ !l_curSetIndex]);
00632       }
00633 
00634       // Swap the sets
00635       l_curSetIndex = !l_curSetIndex;
00636 
00637       // If no node is selected ... exit happily
00638       if( l_nodeSet[ l_curSetIndex].empty() == true)
00639         return false;
00640     }
00641 
00642     // A nodeSet should be in place, so we should be able to fill the results
00643 
00644     for( size_t l_i = 0; l_i < l_nodeSet[ l_curSetIndex].size(); l_i++) {
00645 
00646       XmlNode_t *l_tmpNode = l_nodeSet[ l_curSetIndex][ l_i];
00647 
00648       // According to W3C standards, nodes start at "1"
00649       // So we use p_index == 0 to mark that we take all nodes
00650       // If p_index != 0, then we will only return one result
00651       if( p_index == 0 || ( l_i == ( p_index - 1))) {
00652       
00653         if( l_matchAttribute == false) {
00654           // Fill results with text from the node
00655           p_results.push_back( l_tmpNode->getText());
00656         } else {
00657           // Extract the attribute and add it to the results
00658           std::string l_tmpAttr;
00659           if( l_tmpNode->getAttribute( l_name, l_tmpAttr) == true)
00660             p_results.push_back( l_tmpAttr);
00661         }
00662       }
00663     }
00664 
00665     // Tell the world if any results is being delivered back
00666     return ( p_results.empty() == false);
00667   }
00668 
00669   std::ostream &
00670   operator <<( std::ostream &p_ostream, const XmlDoc_t &p_xmlDoc)
00671   {
00672     // p_ostream << "<?xml version=\"" << m_version << "\" encoding=\"" << m_encoding << "\"" << std::endl;
00673     p_ostream << p_xmlDoc.m_rootNode;
00674 
00675     return p_ostream;
00676   }
00677 
00678 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

Generated by  doxygen 1.6.2