bes  Updated for version 3.20.6
SaxParserWrapper.cc
1 // This file is part of the "NcML Module" project, a BES module designed
3 // to allow NcML files to be used to be used as a wrapper to add
4 // AIS to existing datasets of any format.
5 //
6 // Copyright (c) 2009 OPeNDAP, Inc.
7 // Author: Michael Johnson <m.johnson@opendap.org>
8 //
9 // For more information, please also see the main website: http://opendap.org/
10 //
11 // This library is free software; you can redistribute it and/or
12 // modify it under the terms of the GNU Lesser General Public
13 // License as published by the Free Software Foundation; either
14 // version 2.1 of the License, or (at your option) any later version.
15 //
16 // This library is distributed in the hope that it will be useful,
17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 // Lesser General Public License for more details.
20 //
21 // You should have received a copy of the GNU Lesser General Public
22 // License along with this library; if not, write to the Free Software
23 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 //
25 // Please see the files COPYING and COPYRIGHT for more information on the GLPL.
26 //
27 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
29 
30 #include "SaxParserWrapper.h"
31 
32 #include <exception>
33 #include <iostream>
34 #include <libxml/parser.h>
35 #include <libxml/xmlstring.h>
36 #include <cstdio> // for vsnprintf
37 #include <string>
38 
39 #include "BESDebug.h"
40 #include "BESError.h"
41 #include "BESInternalError.h"
42 #include "BESInternalFatalError.h"
43 #include "BESSyntaxUserError.h"
44 #include "BESForbiddenError.h"
45 #include "BESNotFoundError.h"
46 #include "NCMLDebug.h"
47 #include "SaxParser.h"
48 #include "XMLHelpers.h"
49 
50 // Toggle to tell the parser to use the Sax2 start/end element
51 // calls with namespace information.
52 // [ TODO We probably want to remove the non-namespace pathways at some point,
53 // but I will leave them here for now in case there's issues ]
54 #define NCML_PARSER_USE_SAX2_NAMESPACES 1
55 
56 using namespace std;
57 using namespace ncml_module;
58 
60 // Helpers
61 
62 #if NCML_PARSER_USE_SAX2_NAMESPACES
63 static const int SAX2_NAMESPACE_ATTRIBUTE_ARRAY_STRIDE = 5;
64 static int toXMLAttributeMapWithNamespaces(XMLAttributeMap& attrMap, const xmlChar** attributes, int num_attributes)
65 {
66  attrMap.clear();
67  for (int i = 0; i < num_attributes; ++i) {
68  XMLAttribute attr;
69  attr.fromSAX2NamespaceAttributes(attributes);
70  attributes += SAX2_NAMESPACE_ATTRIBUTE_ARRAY_STRIDE; // jump to start of next record
71  attrMap.addAttribute(attr);
72  }
73  return num_attributes;
74 }
75 #else
76 // Assumes the non-namespace calls, so attrs is stride 2 {name,value}
77 static int toXMLAttributeMapNoNamespaces(XMLAttributeMap& attrMap, const xmlChar** attrs)
78 {
79  attrMap.clear();
80  int count=0;
81  while (attrs && *attrs != NULL)
82  {
83  XMLAttribute attr;
84  attr.localname = XMLUtil::xmlCharToString(*attrs);
85  attr.value = XMLUtil::xmlCharToString(*(attrs+1));
86  attrMap.addAttribute(attr);
87  attrs += 2;
88  count++;
89  }
90  return count;
91 }
92 #endif // NCML_PARSER_USE_SAX2_NAMESPACES
93 
95 // Callback we will register that just pass on to our C++ engine
96 //
97 // NOTE WELL: New C handlers need to follow the given
98 // other examples in order to avoid memory leaks
99 // in libxml during an exception!
100 
101 // To avoid cut & paste below, we use this macro to cast the void* into the wrapper and
102 // set up a proper error handling structure around the main call.
103 // The macro internally defines the symbol "parser" to the SaxParser contained in the wrapper.
104 // So for example, a safe handler call to SaxParser would look like:
105 // static void ncmlStartDocument(void* userData)
106 //{
107 // BEGIN_SAFE_HANDLER_CALL(userData); // pass in the void*, which is a SaxParserWrapper*
108 // parser.onStartDocument(); // call the dispatch on the wrapped parser using the autodefined name parser
109 // END_SAFE_HANDLER_CALL; // end the error handling wrapper
110 //}
111 
112 #define BEGIN_SAFE_PARSER_BLOCK(argName) { \
113  SaxParserWrapper* _spw_ = static_cast<SaxParserWrapper*>(argName); \
114  if (_spw_->isExceptionState()) \
115  { \
116  return; \
117  } \
118  else \
119  { \
120  try \
121  { \
122  SaxParser& parser = _spw_->getParser(); \
123  parser.setParseLineNumber(_spw_->getCurrentParseLine());
124 
125 // This is required after the end of the actual calls to the parser.
126 #define END_SAFE_PARSER_BLOCK } \
127  catch (BESError& theErr) \
128  { \
129  BESDEBUG("ncml", "Caught BESError&, deferring..." << endl); \
130  _spw_->deferException(theErr); \
131  } \
132  catch (std::exception& ex) \
133  { \
134  BESDEBUG("ncml", "Caught std::exception&, wrapping and deferring..." << endl); \
135  BESInternalError _badness_("Wrapped std::exception.what()=" + string(ex.what()), __FILE__, __LINE__);\
136  _spw_->deferException(_badness_); \
137  } \
138  catch (...) \
139  { \
140  BESDEBUG("ncml", "Caught unknown (...) exception: deferring default error." << endl); \
141  BESInternalError _badness_("SaxParserWrapper:: Unknown Exception Type: ", __FILE__, __LINE__); \
142  _spw_->deferException(_badness_); \
143  } \
144  } \
145 }
146 
148 // Our C SAX callbacks, wrapped carefully.
149 
150 static void ncmlStartDocument(void* userData)
151 {
152  BEGIN_SAFE_PARSER_BLOCK(userData)
153 
154  parser.onStartDocument();
155 
156  END_SAFE_PARSER_BLOCK
157 }
158 
159 static void ncmlEndDocument(void* userData)
160 {
161  BEGIN_SAFE_PARSER_BLOCK(userData)
162 
163  parser.onEndDocument();
164 
165  END_SAFE_PARSER_BLOCK
166 }
167 
168 #if !NCML_PARSER_USE_SAX2_NAMESPACES
169 
170 static void ncmlStartElement(void * userData,
171  const xmlChar * name,
172  const xmlChar ** attrs)
173 {
174  // BESDEBUG("ncml", "ncmlStartElement called for:<" << name << ">" << endl);
175  BEGIN_SAFE_PARSER_BLOCK(1)
176 
177  string nameS = XMLUtil::xmlCharToString(name);
178  XMLAttributeMap map;
179  toXMLAttributeMapNoNamespaces(map, attrs);
180 
181  // These args will be valid for the scope of the call.
182  parser.onStartElement(nameS, map);
183 
184  END_SAFE_PARSER_BLOCK
185 }
186 
187 static void ncmlEndElement(void * userData,
188  const xmlChar * name)
189 {
190  BEGIN_SAFE_PARSER_BLOCK(1)
191 
192  string nameS = XMLUtil::xmlCharToString(name);
193  parser.onEndElement(nameS);
194 
195  END_SAFE_PARSER_BLOCK
196 }
197 #endif // !NCML_PARSER_USE_SAX2_NAMESPACES
198 
199 #if NCML_PARSER_USE_SAX2_NAMESPACES
200 static
201 void ncmlSax2StartElementNs(void *userData, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI,
202  int nb_namespaces, const xmlChar **namespaces, int nb_attributes, int /* nb_defaulted */,
203  const xmlChar **attributes)
204 {
205  // BESDEBUG("ncml", "ncmlStartElement called for:<" << name << ">" << endl);
206  BEGIN_SAFE_PARSER_BLOCK(userData)
207 
208  BESDEBUG("ncml", "SaxParserWrapper::ncmlSax2StartElementNs() - localname:" << localname << endl);
209 
210  XMLAttributeMap attrMap;
211  toXMLAttributeMapWithNamespaces(attrMap, attributes, nb_attributes);
212 
213  XMLNamespaceMap nsMap;
214  nsMap.fromSAX2Namespaces(namespaces, nb_namespaces);
215 
216  // These args will be valid for the scope of the call.
217  string localnameString = XMLUtil::xmlCharToString(localname);
218  string prefixString = XMLUtil::xmlCharToString(prefix);
219  string uriString = XMLUtil::xmlCharToString(URI);
220 
221  parser.onStartElementWithNamespace(
222  localnameString,
223  prefixString,
224  uriString,
225  attrMap,
226  nsMap);
227 
228  END_SAFE_PARSER_BLOCK
229 }
230 
231 static
232 void ncmlSax2EndElementNs(void *userData, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI)
233 {
234  BEGIN_SAFE_PARSER_BLOCK(userData)
235 
236  string localnameString = XMLUtil::xmlCharToString(localname);
237  string prefixString = XMLUtil::xmlCharToString(prefix);
238  string uriString = XMLUtil::xmlCharToString(URI);
239  parser.onEndElementWithNamespace(localnameString, prefixString, uriString);
240 
241  END_SAFE_PARSER_BLOCK
242 }
243 #endif // NCML_PARSER_USE_SAX2_NAMESPACES
244 
245 static void ncmlCharacters(void* userData, const xmlChar* content, int len)
246 {
247  BEGIN_SAFE_PARSER_BLOCK(userData)
248 
249  // len is since the content string might not be null terminated,
250  // so we have to build out own and pass it up special....
251  // TODO consider just using these xmlChar's upstairs to avoid copies, or make an adapter or something.
252  string characters("");
253  characters.reserve(len);
254  const xmlChar* contentEnd = content+len;
255  while(content != contentEnd)
256  {
257  characters += (const char)(*content++);
258  }
259 
260  parser.onCharacters(characters);
261 
262  END_SAFE_PARSER_BLOCK
263 }
264 
265 static void ncmlWarning(void* userData, const char* msg, ...)
266 {
267  BEGIN_SAFE_PARSER_BLOCK(userData)
268 
269  BESDEBUG("ncml", "SaxParserWrapper::ncmlWarning() - msg:" << msg << endl);
270 
271  char buffer[1024];
272  va_list(args);
273  va_start(args, msg);
274  unsigned int len = sizeof(buffer);
275  vsnprintf(buffer, len, msg, args);
276  va_end(args);
277  parser.onParseWarning(string(buffer));
278 
279  END_SAFE_PARSER_BLOCK
280 }
281 
282 static void ncmlFatalError(void* userData, const char* msg, ...)
283 {
284  BEGIN_SAFE_PARSER_BLOCK(userData)
285 
286  BESDEBUG("ncml", "SaxParserWrapper::ncmlFatalError() - msg:" << msg << endl);
287 
288  char buffer[1024];
289  va_list(args);
290  va_start(args, msg);
291  unsigned int len = sizeof(buffer);
292  vsnprintf(buffer, len, msg, args);
293  va_end(args);
294  parser.onParseError(string(buffer));
295 
296  END_SAFE_PARSER_BLOCK
297 }
298 
300 // class SaxParserWrapper impl
301 
302 SaxParserWrapper::SaxParserWrapper(SaxParser& parser) :
303  _parser(parser), _handler(), _state(NOT_PARSING), _errorMsg(""), _errorType(0), _errorFile(""), _errorLine(-1)
304 {
305 }
306 
307 SaxParserWrapper::~SaxParserWrapper()
308 {
309  // Really not much to do... everything cleans itself up.
310  _state = NOT_PARSING;
311 
312  // Leak fix. jhrg 6/21/19
313  cleanupParser();
314 }
315 
316 bool SaxParserWrapper::parse(const string& ncmlFilename)
317 {
318  // It's illegal to call this until it's done.
319  if (_state == PARSING) {
320  throw BESInternalError("Parse called again while already in parse.", __FILE__, __LINE__);
321  }
322 
323  // OK, now we're parsing
324  _state = PARSING;
325 
326  setupParser();
327 
328  bool success = xmlSAXUserParseFile(&_handler, this, ncmlFilename.c_str());
329 
330  // If we deferred an exception during the libxml parse call, now's the time to rethrow it.
331  if (isExceptionState()) {
333  }
334 
335  // Otherwise, we're also done parsing.
336  _state = NOT_PARSING;
337  return success;
338 }
339 
341 {
342  _state = EXCEPTION;
343  _errorType = theErr.get_bes_error_type();
344  _errorMsg = theErr.get_message();
345  _errorLine = theErr.get_line();
346  _errorFile = theErr.get_file();
347 }
348 
349 // HACK admittedly a little gross, but it's weird to have to copy an exception
350 // and this seemed the safest way rather than making dynamic storage, etc.
352 {
353  // Clear our state out so we can parse again though.
354  _state = NOT_PARSING;
355 
356  switch (_errorType) {
357  case BES_INTERNAL_ERROR:
358  throw BESInternalError(_errorMsg, _errorFile, _errorLine);
359 
360  case BES_INTERNAL_FATAL_ERROR:
361  throw BESInternalFatalError(_errorMsg, _errorFile, _errorLine);
362 
363  case BES_SYNTAX_USER_ERROR:
364  throw BESSyntaxUserError(_errorMsg, _errorFile, _errorLine);
365 
366  case BES_FORBIDDEN_ERROR:
367  throw BESForbiddenError(_errorMsg, _errorFile, _errorLine);
368 
369  case BES_NOT_FOUND_ERROR:
370  throw BESNotFoundError(_errorMsg, _errorFile, _errorLine);
371 
372  default:
373  throw BESInternalError("Unknown exception type.", __FILE__, __LINE__);
374  }
375 }
376 
378 {
379 #if 0
380  if (_context) {
381  return xmlSAX2GetLineNumber(_context);
382  }
383  else {
384  return -1;
385  }
386 #endif
387  return -1; //FIXME part of leak fix. jhrg 6.21.19
388 }
389 
390 static void setAllHandlerCBToNulls(xmlSAXHandler& h)
391 {
392  h.internalSubset = 0;
393  h.isStandalone = 0;
394  h.hasInternalSubset = 0;
395  h.hasExternalSubset = 0;
396  h.resolveEntity = 0;
397  h.getEntity = 0;
398  h.entityDecl = 0;
399  h.notationDecl = 0;
400  h.attributeDecl = 0;
401  h.elementDecl = 0;
402  h.unparsedEntityDecl = 0;
403  h.setDocumentLocator = 0;
404  h.startDocument = 0;
405  h.endDocument = 0;
406  h.startElement = 0;
407  h.endElement = 0;
408  h.reference = 0;
409  h.characters = 0;
410  h.ignorableWhitespace = 0;
411  h.processingInstruction = 0;
412  h.comment = 0;
413  h.warning = 0;
414  h.error = 0;
415  h.fatalError = 0;
416  h.getParameterEntity = 0;
417  h.cdataBlock = 0;
418  h.externalSubset = 0;
419 
420  // unsigned int initialized; magic number the init should fill in
421  /* The following fields are extensions available only on version 2 */
422  // void *_private; //i'd assume i don't set this either...
423  h.startElementNs = 0;
424  h.endElementNs = 0;
425  h.serror = 0;
426 }
427 
428 void SaxParserWrapper::setupParser()
429 {
430  // setup the handler for version 2,
431  // which sets an internal version magic number
432  // into _handler.initialized
433  // but which doesn't clear the handlers to 0.
434  xmlSAXVersion(&_handler, 2);
435 
436  // Initialize all handlers to 0 by hand to start
437  // so we don't blow those internal magic numbers.
438  setAllHandlerCBToNulls(_handler);
439 
440  // Put our static functions into the handler
441  _handler.startDocument = ncmlStartDocument;
442  _handler.endDocument = ncmlEndDocument;
443  _handler.warning = ncmlWarning;
444  _handler.error = ncmlFatalError;
445  _handler.fatalError = ncmlFatalError;
446  _handler.characters = ncmlCharacters;
447 
448  // We'll use one or the other until we're sure it works.
449 #if NCML_PARSER_USE_SAX2_NAMESPACES
450  _handler.startElement = 0;
451  _handler.endElement = 0;
452  _handler.startElementNs = ncmlSax2StartElementNs;
453  _handler.endElementNs = ncmlSax2EndElementNs;
454 #else
455  _handler.startElement = ncmlStartElement;
456  _handler.endElement = ncmlEndElement;
457  _handler.startElementNs = 0;
458  _handler.endElementNs = 0;
459 #endif // NCML_PARSER_USE_SAX2_NAMESPACES
460 }
461 
462 // Leak fix. jhrg 6/21/19
463 void SaxParserWrapper::cleanupParser() throw ()
464 {
465 }
BESError::get_bes_error_type
virtual int get_bes_error_type()
Return the return code for this error class.
Definition: BESError.h:143
BESError::get_line
virtual int get_line()
get the line number where the exception was thrown
Definition: BESError.h:115
ncml_module::SaxParserWrapper::rethrowException
void rethrowException()
Definition: SaxParserWrapper.cc:351
BESInternalFatalError
exception thrown if an internal error is found and is fatal to the BES
Definition: BESInternalFatalError.h:43
ncml_module::XMLAttribute::fromSAX2NamespaceAttributes
void fromSAX2NamespaceAttributes(const xmlChar **chunkOfFivePointers)
Definition: XMLHelpers.cc:94
ncml_module::XMLNamespaceMap::fromSAX2Namespaces
void fromSAX2Namespaces(const xmlChar **pNamespaces, int numNamespaces)
Definition: XMLHelpers.cc:320
BESNotFoundError
error thrown if the resource requested cannot be found
Definition: BESNotFoundError.h:40
BESError::get_message
virtual std::string get_message()
get the error message for this exception
Definition: BESError.h:99
BESError::get_file
virtual std::string get_file()
get the file name where the exception was thrown
Definition: BESError.h:107
ncml_module::XMLAttributeMap
Definition: XMLHelpers.h:93
ncml_module::SaxParserWrapper::parse
bool parse(const std::string &ncmlFilename)
Do a SAX parse of the ncmlFilename and pass the calls to wrapper parser.
Definition: SaxParserWrapper.cc:316
ncml_module::SaxParserWrapper::isExceptionState
bool isExceptionState() const
Definition: SaxParserWrapper.h:150
BESSyntaxUserError
error thrown if there is a user syntax error in the request or any other user error
Definition: BESSyntaxUserError.h:41
ncml_module::XMLAttributeMap::addAttribute
void addAttribute(const XMLAttribute &attribute)
Definition: XMLHelpers.cc:167
BESForbiddenError
error thrown if the BES is not allowed to access the resource requested
Definition: BESForbiddenError.h:40
BESInternalError
exception thrown if internal error encountered
Definition: BESInternalError.h:43
ncml_module::SaxParserWrapper::getCurrentParseLine
int getCurrentParseLine() const
Definition: SaxParserWrapper.cc:377
ncml_module
NcML Parser for adding/modifying/removing metadata (attributes) to existing local datasets using NcML...
Definition: AggregationElement.cc:72
ncml_module::XMLAttribute
Definition: XMLHelpers.h:58
ncml_module::XMLUtil
Definition: XMLHelpers.h:52
ncml_module::XMLNamespaceMap
Definition: XMLHelpers.h:150
ncml_module::SaxParser
Interface class for the wrapper between libxml C SAX parser and our NCMLParser.
Definition: SaxParser.h:48
BESError
Abstract exception class for the BES with basic string message.
Definition: BESError.h:58
ncml_module::XMLAttributeMap::clear
void clear()
Definition: XMLHelpers.cc:161
ncml_module::SaxParserWrapper::deferException
void deferException(BESError &theErr)
The remaining calls are for the internals of the parser, but need to be public.
Definition: SaxParserWrapper.cc:340