bes  Updated for version 3.20.6
DmrppParserSax2.cc
1 // -*- mode: c++; c-basic-offset:4 -*-
2 
3 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
4 // Access Protocol.
5 
6 // Copyright (c) 2012 OPeNDAP, Inc.
7 // Author: James Gallagher <jgallagher@opendap.org>
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 //
23 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
24 
25 #include "config.h"
26 
27 #include <iostream>
28 #include <sstream>
29 
30 #include <cstring>
31 #include <cstdarg>
32 #include <cassert>
33 
34 #include <libxml/parserInternals.h>
35 
36 #include <DMR.h>
37 
38 #include <BaseType.h>
39 #include <Array.h>
40 #include <D4Group.h>
41 #include <D4Attributes.h>
42 #include <D4Maps.h>
43 #include <D4Enum.h>
44 #include <D4BaseTypeFactory.h>
45 
46 #include <DapXmlNamespaces.h>
47 #include <util.h>
48 
49 #include <BESInternalError.h>
50 #include <BESDebug.h>
51 #include <BESCatalog.h>
52 #include <BESCatalogUtils.h>
53 #include <BESCatalogList.h>
54 #include <BESUtil.h>
55 
56 #include "DmrppParserSax2.h"
57 #include "DmrppCommon.h"
58 
59 #define FIVE_12K 524288;
60 #define ONE_MB 1048576;
61 #define MAX_INPUT_LINE_LENGTH ONE_MB;
62 
63 
64 static const string module = "dmrpp:2";
65 static const string dmrpp_namespace = "http://xml.opendap.org/dap/dmrpp/1.0.0#";
66 
67 using namespace libdap;
68 using namespace std;
69 
70 namespace dmrpp {
71 
72 static const char *states[] = { "parser_start",
73 
74 "inside_dataset",
75 
76 // inside_group is the state just after parsing the start of a Group
77 // element.
78  "inside_group",
79 
80  "inside_attribute_container", "inside_attribute", "inside_attribute_value", "inside_other_xml_attribute",
81 
82  "inside_enum_def", "inside_enum_const",
83 
84  "inside_dim_def",
85 
86  // This covers Byte, ..., Url, Opaque
87  "inside_simple_type",
88 
89  // "inside_array",
90  "inside_dim", "inside_map",
91 
92  "inside_constructor",
93 
94  "not_dap4_element", "inside_dmrpp_object", "inside_dmrpp_chunkDimensionSizes_element",
95 
96  "parser_unknown", "parser_error", "parser_fatal_error",
97 
98  "parser_end" };
99 
100 static bool is_not(const char *name, const char *tag)
101 {
102  return strcmp(name, tag) != 0;
103 }
104 
113 D4EnumDef *
114 DmrppParserSax2::enum_def()
115 {
116  if (!d_enum_def) d_enum_def = new D4EnumDef;
117 
118  return d_enum_def;
119 }
120 
127 D4Dimension *
128 DmrppParserSax2::dim_def()
129 {
130  if (!d_dim_def) d_dim_def = new D4Dimension;
131 
132  return d_dim_def;
133 }
134 
135 /* Search through the attribute array for a given attribute name.
136  * If the name is found, return the string value for that attribute
137  * @param name: Search for this name
138  * @param attributes: Array that holds the attribute values to search
139  * @param num_attributes: Number of attributes
140  * @return string value of attribute; the empty string if the name was not found
141  */
142 string DmrppParserSax2::get_attribute_val(const string &name, const xmlChar **attributes, int num_attributes)
143 {
144  unsigned int index = 0;
145  for (int i = 0; i < num_attributes; ++i, index += 5) {
146  if (strncmp(name.c_str(), (const char *)attributes[index], name.length()) == 0) {
147  return string((const char *)attributes[index+3], (const char *)attributes[index+4]);
148  }
149  }
150  return "";
151 }
152 
153 #if 0
154 
159 void DmrppParserSax2::transfer_xml_attrs(const xmlChar **attributes, int nb_attributes)
160 {
161  if (!xml_attrs.empty()) xml_attrs.clear(); // erase old attributes
162 
163  // Make a value using the attribute name and the prefix, namespace URI
164  // and the value. The prefix might be null.
165  unsigned int index = 0;
166  for (int i = 0; i < nb_attributes; ++i, index += 5) {
167  xml_attrs.insert(
168  map<string, XMLAttribute>::value_type(string((const char *) attributes[index]),
169  XMLAttribute(attributes + index + 1)));
170 
171  BESDEBUG(module,
172  "XML Attribute '" << (const char *)attributes[index] << "': " << xml_attrs[(const char *)attributes[index]].value << endl);
173  }
174 }
175 #endif
176 
183 void DmrppParserSax2::transfer_xml_ns(const xmlChar **namespaces, int nb_namespaces)
184 {
185  // make a value with the prefix and namespace URI. The prefix might be null.
186  for (int i = 0; i < nb_namespaces; ++i) {
187  namespace_table.insert(
188  map<string, string>::value_type(namespaces[i * 2] != 0 ? (const char *) namespaces[i * 2] : "",
189  (const char *) namespaces[i * 2 + 1]));
190  }
191 }
192 
193 #if 0
194 
200 bool DmrppParserSax2::check_required_attribute(const string & attr)
201 {
202  if (xml_attrs.find(attr) == xml_attrs.end()) {
203  dmr_error(this, "Required attribute '%s' not found.", attr.c_str());
204  return false;
205  }
206  else
207  return true;
208 }
209 #endif
210 
211 /*
212  * An improved version of the previous check_required_attribute.
213  * Searches for an attribute name within the attribute array.
214  * @param name: The attribute name to search for
215  * @param attributes: The attribute array
216  * @param num_attributes: The number of attributes
217  * @return success: true
218  * failure: false
219  */
220 bool DmrppParserSax2::check_required_attribute(const string &name, const xmlChar **attributes, int num_attributes)
221 {
222  unsigned int index = 0;
223  for (int i = 0; i < num_attributes; ++i, index += 5) {
224  if (strncmp(name.c_str(), (const char *)attributes[index], name.length()) == 0) {
225  return true;
226  }
227  }
228 
229  dmr_error(this, "Required attribute '%s' not found.", name.c_str());
230  return false;
231 }
232 
233 #if 0
234 
240 bool DmrppParserSax2::check_attribute(const string & attr)
241 {
242  return (xml_attrs.find(attr) != xml_attrs.end());
243 }
244 #endif
245 
256 bool DmrppParserSax2::check_attribute(const string &name, const xmlChar **attributes, int num_attributes)
257 {
258  unsigned int index = 0;
259  for (int i = 0; i < num_attributes; ++i, index += 5) {
260  if (strncmp(name.c_str(), (const char *)attributes[index], name.length()) == 0) {
261  return true;
262  }
263  }
264  return false;
265 }
266 
267 bool DmrppParserSax2::process_dimension_def(const char *name, const xmlChar **attrs, int nb_attributes)
268 {
269  if (is_not(name, "Dimension")) return false;
270 
271 #if 0
272  transfer_xml_attrs(attrs, nb_attributes);
273 #endif
274 
275  if (!(check_required_attribute("name", attrs, nb_attributes) && check_required_attribute("size", attrs, nb_attributes))) {
276  dmr_error(this, "The required attribute 'name' or 'size' was missing from a Dimension element.");
277  return false;
278  }
279 
280  // This getter (dim_def) allocates a new object if needed.
281  dim_def()->set_name(get_attribute_val("name", attrs, nb_attributes));
282  try {
283  dim_def()->set_size(get_attribute_val("size", attrs, nb_attributes));
284  }
285  catch (Error &e) {
286  dmr_error(this, e.get_error_message().c_str());
287  return false;
288  }
289 
290  return true;
291 }
292 
310 bool DmrppParserSax2::process_dimension(const char *name, const xmlChar **attrs, int nb_attributes)
311 {
312  if (is_not(name, "Dim")) return false;
313 
314 #if 0
315  transfer_xml_attrs(attrs, nb_attributes);
316 #endif
317 
318  if (check_attribute("size", attrs, nb_attributes) && check_attribute("name", attrs, nb_attributes)) {
319  dmr_error(this, "Only one of 'size' and 'name' are allowed in a Dim element, but both were used.");
320  return false;
321  }
322  if (!(check_attribute("size", attrs, nb_attributes) || check_attribute("name", attrs, nb_attributes))) {
323  dmr_error(this, "Either 'size' or 'name' must be used in a Dim element.");
324  return false;
325  }
326 
327  if (!top_basetype()->is_vector_type()) {
328  // Make the top BaseType* an array
329  BaseType *b = top_basetype();
330  pop_basetype();
331 
332  Array *a = static_cast<Array*>(dmr()->factory()->NewVariable(dods_array_c, b->name()));
333  a->set_is_dap4(true);
334  a->add_var_nocopy(b);
335  a->set_attributes_nocopy(b->attributes());
336  // trick: instead of popping b's attributes, copying them and then pushing
337  // a's copy, just move the pointer (but make sure there's only one object that
338  // references that pointer).
339  b->set_attributes_nocopy(0);
340 
341  push_basetype(a);
342  }
343 
344  assert(top_basetype()->is_vector_type());
345 
346  Array *a = static_cast<Array*>(top_basetype());
347  if (check_attribute("size", attrs, nb_attributes)) {
348  a->append_dim(stoi(get_attribute_val("size", attrs, nb_attributes))); // low budget code for now. jhrg 8/20/13, modified to use new function. kln 9/7/19
349  return true;
350  }
351  else if (check_attribute("name", attrs, nb_attributes)) {
352  string name = get_attribute_val("name", attrs, nb_attributes);
353 
354  D4Dimension *dim = 0;
355  if (name[0] == '/') // lookup the Dimension in the root group
356  dim = dmr()->root()->find_dim(name);
357  else
358  // get enclosing Group and lookup Dimension there
359  dim = top_group()->find_dim(name);
360 
361  if (!dim)
362  throw Error("The dimension '" + name + "' was not found while parsing the variable '" + a->name() + "'.");
363  a->append_dim(dim);
364  return true;
365  }
366 
367  return false;
368 }
369 
370 bool DmrppParserSax2::process_map(const char *name, const xmlChar **attrs, int nb_attributes)
371 {
372  if (is_not(name, "Map")) return false;
373 
374 #if 0
375  transfer_xml_attrs(attrs, nb_attributes);
376 #endif
377 
378  if (!check_attribute("name", attrs, nb_attributes)) {
379  dmr_error(this, "The 'name' attribute must be used in a Map element.");
380  return false;
381  }
382 
383  if (!top_basetype()->is_vector_type()) {
384  // Make the top BaseType* an array
385  BaseType *b = top_basetype();
386  pop_basetype();
387 
388  Array *a = static_cast<Array*>(dmr()->factory()->NewVariable(dods_array_c, b->name()));
389  a->set_is_dap4(true);
390  a->add_var_nocopy(b);
391  a->set_attributes_nocopy(b->attributes());
392  // trick: instead of popping b's attributes, copying them and then pushing
393  // a's copy, just move the pointer (but make sure there's only one object that
394  // references that pointer).
395  b->set_attributes_nocopy(0);
396 
397  push_basetype(a);
398  }
399 
400  assert(top_basetype()->is_vector_type());
401 
402  Array *a = static_cast<Array*>(top_basetype());
403 
404  string map_name = get_attribute_val("name", attrs, nb_attributes);
405  if (get_attribute_val("name", attrs, nb_attributes).at(0) != '/') map_name = top_group()->FQN() + map_name;
406 
407  Array *map_source = 0; // The array variable that holds the data for the Map
408 
409  if (map_name[0] == '/') // lookup the Map in the root group
410  map_source = dmr()->root()->find_map_source(map_name);
411  else
412  // get enclosing Group and lookup Map there
413  map_source = top_group()->find_map_source(map_name);
414 
415  // Change: If the parser is in 'strict' mode (the default) and the Array named by
416  // the Map cannot be fond, it is an error. If 'strict' mode is false (permissive
417  // mode), then this is not an error. However, the Array referenced by the Map will
418  // be null. This is a change in the parser's behavior to accommodate requests for
419  // Arrays that include Maps that do not also include the Map(s) in the request.
420  // See https://opendap.atlassian.net/browse/HYRAX-98. jhrg 4/13/16
421  if (!map_source && d_strict)
422  throw Error("The Map '" + map_name + "' was not found while parsing the variable '" + a->name() + "'.");
423 
424  a->maps()->add_map(new D4Map(map_name, map_source));
425 
426  return true;
427 }
428 
429 bool DmrppParserSax2::process_group(const char *name, const xmlChar **attrs, int nb_attributes)
430 {
431  if (is_not(name, "Group")) return false;
432 
433 #if 0
434  transfer_xml_attrs(attrs, nb_attributes);
435 #endif
436 
437  if (!check_required_attribute("name", attrs, nb_attributes)) {
438  dmr_error(this, "The required attribute 'name' was missing from a Group element.");
439  return false;
440  }
441 
442  BaseType *btp = dmr()->factory()->NewVariable(dods_group_c, get_attribute_val("name", attrs, nb_attributes));
443  if (!btp) {
444  dmr_fatal_error(this, "Could not instantiate the Group '%s'.", get_attribute_val("name", attrs, nb_attributes).c_str());
445  return false;
446  }
447 
448  D4Group *grp = static_cast<D4Group*>(btp);
449 
450  // Need to set this to get the D4Attribute behavior in the type classes
451  // shared between DAP2 and DAP4. jhrg 4/18/13
452  grp->set_is_dap4(true);
453 
454  // link it up and change the current group
455  D4Group *parent = top_group();
456  if (!parent) {
457  dmr_fatal_error(this, "No Group on the Group stack.");
458  return false;
459  }
460 
461  grp->set_parent(parent);
462  parent->add_group_nocopy(grp);
463 
464  push_group(grp);
465  push_attributes(grp->attributes());
466  return true;
467 }
468 
475 inline bool DmrppParserSax2::process_attribute(const char *name, const xmlChar **attrs, int nb_attributes)
476 {
477  if (is_not(name, "Attribute")) return false;
478 
479 #if 0
480  // These methods set the state to parser_error if a problem is found.
481  transfer_xml_attrs(attrs, nb_attributes);
482 #endif
483 
484  // add error
485  if (!(check_required_attribute(string("name"), attrs, nb_attributes) && check_required_attribute(string("type"), attrs, nb_attributes))) {
486  dmr_error(this, "The required attribute 'name' or 'type' was missing from an Attribute element.");
487  return false;
488  }
489 
490  if (get_attribute_val("type", attrs, nb_attributes) == "Container") {
491  push_state(inside_attribute_container);
492 
493  BESDEBUG(module, "Pushing attribute container " << get_attribute_val("name", attrs, nb_attributes) << endl);
494  D4Attribute *child = new D4Attribute(get_attribute_val("name", attrs, nb_attributes), attr_container_c);
495 
496  D4Attributes *tos = top_attributes();
497  // add return
498  if (!tos) {
499  delete child;
500  dmr_fatal_error(this, "Expected an Attribute container on the top of the attribute stack.");
501  return false;
502  }
503 
504  tos->add_attribute_nocopy(child);
505  push_attributes(child->attributes());
506  }
507  else if (get_attribute_val("type", attrs, nb_attributes) == "OtherXML") {
508  push_state(inside_other_xml_attribute);
509 
510  dods_attr_name = get_attribute_val("name", attrs, nb_attributes);
511  dods_attr_type = get_attribute_val("type", attrs, nb_attributes);
512  }
513  else {
514  push_state(inside_attribute);
515 
516  dods_attr_name = get_attribute_val("name", attrs, nb_attributes);
517  dods_attr_type = get_attribute_val("type", attrs, nb_attributes);
518  }
519 
520  return true;
521 }
522 
528 inline bool DmrppParserSax2::process_enum_def(const char *name, const xmlChar **attrs, int nb_attributes)
529 {
530  if (is_not(name, "Enumeration")) return false;
531 
532 #if 0
533  transfer_xml_attrs(attrs, nb_attributes);
534 #endif
535 
536  if (!(check_required_attribute("name", attrs, nb_attributes) && check_required_attribute("basetype", attrs, nb_attributes))) {
537  dmr_error(this, "The required attribute 'name' or 'basetype' was missing from an Enumeration element.");
538  return false;
539  }
540 
541  Type t = get_type(get_attribute_val("basetype", attrs, nb_attributes).c_str());
542  if (!is_integer_type(t)) {
543  dmr_error(this, "The Enumeration '%s' must have an integer type, instead the type '%s' was used.",
544  get_attribute_val("name", attrs, nb_attributes).c_str(), get_attribute_val("basetype", attrs, nb_attributes).c_str());
545  return false;
546  }
547 
548  // This getter allocates a new object if needed.
549  string enum_def_path = get_attribute_val("name", attrs, nb_attributes);
550 #if 0
551  // Use FQNs when things are referenced, not when they are defined
552  if (xml_attrs["name"].value[0] != '/')
553  enum_def_path = top_group()->FQN() + enum_def_path;
554 #endif
555  enum_def()->set_name(enum_def_path);
556  enum_def()->set_type(t);
557 
558  return true;
559 }
560 
561 inline bool DmrppParserSax2::process_enum_const(const char *name, const xmlChar **attrs, int nb_attributes)
562 {
563  if (is_not(name, "EnumConst")) return false;
564 
565 #if 0
566  // These methods set the state to parser_error if a problem is found.
567  transfer_xml_attrs(attrs, nb_attributes);
568 #endif
569 
570  if (!(check_required_attribute("name", attrs, nb_attributes) && check_required_attribute("value", attrs, nb_attributes))) {
571  dmr_error(this, "The required attribute 'name' or 'value' was missing from an EnumConst element.");
572  return false;
573  }
574 
575  istringstream iss(get_attribute_val("value", attrs, nb_attributes));
576  long long value = 0;
577  iss >> skipws >> value;
578  if (iss.fail() || iss.bad()) {
579  dmr_error(this, "Expected an integer value for an Enumeration constant, got '%s' instead.",
580  get_attribute_val("value", attrs, nb_attributes).c_str());
581  }
582  else if (!enum_def()->is_valid_enum_value(value)) {
583  dmr_error(this, "In an Enumeration constant, the value '%s' cannot fit in a variable of type '%s'.",
584  get_attribute_val("value", attrs, nb_attributes).c_str(), D4type_name(d_enum_def->type()).c_str());
585  }
586  else {
587  // unfortunate choice of names... args are 'label' and 'value'
588  enum_def()->add_value(get_attribute_val("name", attrs, nb_attributes), value);
589  }
590 
591  return true;
592 }
593 
599 inline bool DmrppParserSax2::process_variable(const char *name, const xmlChar **attrs, int nb_attributes)
600 {
601  Type t = get_type(name);
602  if (is_simple_type(t)) {
603  process_variable_helper(t, inside_simple_type, attrs, nb_attributes);
604  return true;
605  }
606  else {
607  switch (t) {
608  case dods_structure_c:
609  process_variable_helper(t, inside_constructor, attrs, nb_attributes);
610  return true;
611 
612  case dods_sequence_c:
613  process_variable_helper(t, inside_constructor, attrs, nb_attributes);
614  return true;
615 
616  default:
617  return false;
618  }
619  }
620 }
621 
629 void DmrppParserSax2::process_variable_helper(Type t, ParseState s, const xmlChar **attrs, int nb_attributes)
630 {
631 #if 0
632  transfer_xml_attrs(attrs, nb_attributes);
633 #endif
634 
635  if (check_required_attribute("name", attrs, nb_attributes)) {
636  BaseType *btp = dmr()->factory()->NewVariable(t, get_attribute_val("name", attrs, nb_attributes));
637  if (!btp) {
638  dmr_fatal_error(this, "Could not instantiate the variable '%s'.", xml_attrs["name"].value.c_str());
639  return;
640  }
641 
642  if ((t == dods_enum_c) && check_required_attribute("enum", attrs, nb_attributes)) {
643  D4EnumDef *enum_def = 0;
644  string enum_path = get_attribute_val("enum", attrs, nb_attributes);
645  if (enum_path[0] == '/')
646  enum_def = dmr()->root()->find_enum_def(enum_path);
647  else
648  enum_def = top_group()->find_enum_def(enum_path);
649 
650  if (!enum_def) dmr_fatal_error(this, "Could not find the Enumeration definition '%s'.", enum_path.c_str());
651 
652  static_cast<D4Enum*>(btp)->set_enumeration(enum_def);
653  }
654 
655  btp->set_is_dap4(true); // see comment above
656  push_basetype(btp);
657 
658  push_attributes(btp->attributes());
659 
660  push_state(s);
661  }
662 }
663 
674 void DmrppParserSax2::dmr_start_document(void * p)
675 {
676  DmrppParserSax2 *parser = static_cast<DmrppParserSax2*>(p);
677  parser->error_msg = "";
678  parser->char_data = "";
679 
680  // Set this in intern_helper so that the loop test for the parser_end
681  // state works for the first iteration. It seems like XMLParseChunk calls this
682  // function on it's first run. jhrg 9/16/13
683  // parser->push_state(parser_start);
684 
685  parser->push_attributes(parser->dmr()->root()->attributes());
686 
687  if (parser->debug()) cerr << "Parser start state: " << states[parser->get_state()] << endl;
688 }
689 
692 void DmrppParserSax2::dmr_end_document(void * p)
693 {
694  DmrppParserSax2 *parser = static_cast<DmrppParserSax2*>(p);
695 
696  if (parser->debug()) cerr << "Parser end state: " << states[parser->get_state()] << endl;
697 
698  if (parser->get_state() != parser_end)
699  DmrppParserSax2::dmr_error(parser, "The document contained unbalanced tags.");
700 
701  // If we've found any sort of error, don't make the DMR; intern() will
702  // take care of the error.
703  if (parser->get_state() == parser_error || parser->get_state() == parser_fatal_error) return;
704 
705  if (!parser->empty_basetype() || parser->empty_group())
706  DmrppParserSax2::dmr_error(parser,
707  "The document did not contain a valid root Group or contained unbalanced tags.");
708 
709  if (parser->debug()) parser->top_group()->dump(cerr);
710 
711  parser->pop_group(); // leave the stack 'clean'
712  parser->pop_attributes();
713 }
714 
715 void DmrppParserSax2::dmr_start_element(void *p, const xmlChar *l, const xmlChar *prefix, const xmlChar *URI,
716  int nb_namespaces, const xmlChar **namespaces, int nb_attributes, int /*nb_defaulted*/, const xmlChar **attributes)
717 {
718  DmrppParserSax2 *parser = static_cast<DmrppParserSax2*>(p);
719  const char *localname = reinterpret_cast<const char *>(l);
720 
721  string this_element_ns_name(URI ? (char *) URI : "null");
722 
723  if (parser->get_state() != parser_error) {
724  string dap4_ns_name = DapXmlNamspaces::getDapNamespaceString(DAP_4_0);
725  if (parser->debug()) cerr << "dap4_ns_name: " << dap4_ns_name << endl;
726 
727  if (this_element_ns_name == dmrpp_namespace) {
728  if (strcmp(localname, "chunkDimensionSizes") == 0) {
729  if (parser->debug()) cerr << "Found dmrpp:chunkDimensionSizes element. Pushing state." << endl;
730  parser->push_state(inside_dmrpp_chunkDimensionSizes_element);
731  }
732  else {
733  if (parser->debug())
734  cerr << "Start of element in dmrpp namespace: " << localname << " detected." << endl;
735  parser->push_state(inside_dmrpp_object);
736  // Ingest the dmrpp namespaced element text content
737  }
738 
739  }
740  else if (this_element_ns_name != dap4_ns_name) {
741  if (parser->debug()) cerr << "Start of non DAP4 element: " << localname << " detected." << endl;
742  parser->push_state(not_dap4_element);
743  }
744  }
745 
746  if (parser->debug())
747  cerr << "Start element " << localname << " prefix: " << (prefix ? (char *) prefix : "null") << " ns: "
748  << this_element_ns_name << " (state: " << states[parser->get_state()] << ")" << endl;
749 
750  switch (parser->get_state()) {
751  case parser_start:
752  if (is_not(localname, "Dataset"))
753  DmrppParserSax2::dmr_error(parser, "Expected DMR to start with a Dataset element; found '%s' instead.",
754  localname);
755 
756  parser->root_ns = URI ? (const char *) URI : "";
757 
758 #if 0
759  parser->transfer_xml_attrs(attributes, nb_attributes);
760 #endif
761 
762  if (parser->check_required_attribute(string("name"), attributes, nb_attributes)) parser->dmr()->set_name(parser->get_attribute_val("name", attributes, nb_attributes));
763 
764  if (parser->check_attribute("dapVersion", attributes, nb_attributes))
765  parser->dmr()->set_dap_version(parser->get_attribute_val("dapVersion", attributes, nb_attributes));
766 
767  if (parser->check_attribute("dmrVersion", attributes, nb_attributes))
768  parser->dmr()->set_dmr_version(parser->get_attribute_val("dmrVersion", attributes, nb_attributes));
769 
770  if (parser->check_attribute("base", attributes, nb_attributes)) {
771  parser->dmr()->set_request_xml_base(parser->get_attribute_val("base", attributes, nb_attributes));
772  }
773  if (parser->debug()) cerr << "Dataset xml:base is set to '" << parser->dmr()->request_xml_base() << "'" << endl;
774 
775  if (parser->check_attribute("href", attributes, nb_attributes)) {
776  parser->dmrpp_dataset_href = parser->get_attribute_val("href", attributes, nb_attributes);
777  }
778  if (parser->debug()) cerr << "Dataset dmrpp:href is set to '" << parser->dmrpp_dataset_href << "'" << endl;
779 
780  if (!parser->root_ns.empty()) parser->dmr()->set_namespace(parser->root_ns);
781 
782  // Push the root Group on the stack
783  parser->push_group(parser->dmr()->root());
784 
785  parser->push_state(inside_dataset);
786 
787  break;
788 
789  // Both inside dataset and inside group can have the same stuff.
790  // The difference is that the Dataset holds the root group, which
791  // must be present; other groups are optional
792  case inside_dataset:
793  case inside_group:
794  if (parser->process_enum_def(localname, attributes, nb_attributes))
795  parser->push_state(inside_enum_def);
796  else if (parser->process_dimension_def(localname, attributes, nb_attributes))
797  parser->push_state(inside_dim_def);
798  else if (parser->process_group(localname, attributes, nb_attributes))
799  parser->push_state(inside_group);
800  else if (parser->process_variable(localname, attributes, nb_attributes))
801  // This will push either inside_simple_type or inside_structure
802  // onto the parser state stack.
803  break;
804  else if (parser->process_attribute(localname, attributes, nb_attributes))
805  // This will push either inside_attribute, inside_attribute_container
806  // or inside_otherxml_attribute onto the parser state stack
807  break;
808  else
809  DmrppParserSax2::dmr_error(parser,
810  "Expected an Attribute, Enumeration, Dimension, Group or variable element; found '%s' instead.",
811  localname);
812  break;
813 
814  case inside_attribute_container:
815  if (parser->process_attribute(localname, attributes, nb_attributes))
816  break;
817  else
818  DmrppParserSax2::dmr_error(parser, "Expected an Attribute element; found '%s' instead.", localname);
819  break;
820 
821  case inside_attribute:
822  if (parser->process_attribute(localname, attributes, nb_attributes))
823  break;
824  else if (strcmp(localname, "Value") == 0)
825  parser->push_state(inside_attribute_value);
826  else
827  dmr_error(parser, "Expected an 'Attribute' or 'Value' element; found '%s' instead.", localname);
828  break;
829 
830  case inside_attribute_value:
831  // Attribute values are processed by the end element code.
832  break;
833 
834  case inside_other_xml_attribute:
835  parser->other_xml_depth++;
836 
837  // Accumulate the elements here
838  parser->other_xml.append("<");
839  if (prefix) {
840  parser->other_xml.append((const char *) prefix);
841  parser->other_xml.append(":");
842  }
843  parser->other_xml.append(localname);
844 
845  if (nb_namespaces != 0) {
846  parser->transfer_xml_ns(namespaces, nb_namespaces);
847 
848  for (map<string, string>::iterator i = parser->namespace_table.begin(); i != parser->namespace_table.end();
849  ++i) {
850  parser->other_xml.append(" xmlns");
851  if (!i->first.empty()) {
852  parser->other_xml.append(":");
853  parser->other_xml.append(i->first);
854  }
855  parser->other_xml.append("=\"");
856  parser->other_xml.append(i->second);
857  parser->other_xml.append("\"");
858  }
859  }
860 
861  if (nb_attributes != 0) {
862 #if 0
863  parser->transfer_xml_attrs(attributes, nb_attributes);
864 #endif
865  for (XMLAttrMap::iterator i = parser->xml_attr_begin(); i != parser->xml_attr_end(); ++i) {
866  parser->other_xml.append(" ");
867  if (!i->second.prefix.empty()) {
868  parser->other_xml.append(i->second.prefix);
869  parser->other_xml.append(":");
870  }
871  parser->other_xml.append(i->first);
872  parser->other_xml.append("=\"");
873  parser->other_xml.append(i->second.value);
874  parser->other_xml.append("\"");
875  }
876  }
877 
878  parser->other_xml.append(">");
879  break;
880 
881  case inside_enum_def:
882  // process an EnumConst element
883  if (parser->process_enum_const(localname, attributes, nb_attributes))
884  parser->push_state(inside_enum_const);
885  else
886  dmr_error(parser, "Expected an 'EnumConst' element; found '%s' instead.", localname);
887  break;
888 
889  case inside_enum_const:
890  // No content; nothing to do
891  break;
892 
893  case inside_dim_def:
894  // No content; nothing to do
895  break;
896 
897  case inside_dim:
898  // No content.
899  break;
900 
901  case inside_map:
902  // No content.
903  break;
904 
905  case inside_simple_type:
906  if (parser->process_attribute(localname, attributes, nb_attributes))
907  break;
908  else if (parser->process_dimension(localname, attributes, nb_attributes))
909  parser->push_state(inside_dim);
910  else if (parser->process_map(localname, attributes, nb_attributes))
911  parser->push_state(inside_map);
912  else
913  dmr_error(parser, "Expected an 'Attribute', 'Dim' or 'Map' element; found '%s' instead.", localname);
914  break;
915 
916  case inside_constructor:
917  if (parser->process_variable(localname, attributes, nb_attributes))
918  // This will push either inside_simple_type or inside_structure
919  // onto the parser state stack.
920  break;
921  else if (parser->process_attribute(localname, attributes, nb_attributes))
922  break;
923  else if (parser->process_dimension(localname, attributes, nb_attributes))
924  parser->push_state(inside_dim);
925  else if (parser->process_map(localname, attributes, nb_attributes))
926  parser->push_state(inside_map);
927  else
928  DmrppParserSax2::dmr_error(parser,
929  "Expected an Attribute, Dim, Map or variable element; found '%s' instead.", localname);
930  break;
931 
932  case not_dap4_element:
933  if (parser->debug())
934  cerr << "SKIPPING unexpected element. localname: " << localname << "namespace: " << this_element_ns_name
935  << endl;
936  break;
937 
938  case inside_dmrpp_object: {
939  if (parser->debug()) cerr << "Inside dmrpp namespaced element. localname: " << localname << endl;
940  assert(this_element_ns_name == dmrpp_namespace);
941 
942 #if 0
943  parser->transfer_xml_attrs(attributes, nb_attributes); // load up xml_attrs
944 #endif
945 
946  BaseType *bt = parser->top_basetype();
947  if (!bt) throw BESInternalError("Could locate parent BaseType during parse operation.", __FILE__, __LINE__);
948 
949  DmrppCommon *dc = dynamic_cast<DmrppCommon*>(bt); // Get the Dmrpp common info
950  if (!dc)
951  throw BESInternalError("Could not cast BaseType to DmrppType in the drmpp handler.", __FILE__, __LINE__);
952 
953  // Ingest the dmrpp:chunks element and it attributes
954  if (strcmp(localname, "chunks") == 0) {
955  if (parser->debug()) cerr << "DMR++ chunks element. localname: " << localname << endl;
956 
957  if (parser->check_attribute("compressionType", attributes, nb_attributes)) {
958  string compression_type_string(parser->get_attribute_val("compressionType", attributes, nb_attributes));
959  dc->ingest_compression_type(compression_type_string);
960 
961  if (parser->debug())
962  cerr << "Processed attribute 'compressionType=\"" << compression_type_string << "\"'" << endl;
963  }
964  else {
965  if (parser->debug())
966  cerr << "There was no 'compressionType' attribute associated with the variable '" << bt->type_name()
967  << " " << bt->name() << "'" << endl;
968  }
969  }
970  // Ingest an dmrpp:chunk element and its attributes
971  else if (strcmp(localname, "chunk") == 0) {
972  string data_url = "unknown_data_location";
973  if (parser->check_attribute("href", attributes, nb_attributes)) {
974 #if 0
975  istringstream data_url_ss(parser->xml_attrs["href"].value);
976  data_url = data_url_ss.str();
977  if (parser->debug())
978  cerr << "Processing 'href' value into data_url. href: " << data_url_ss.str() << endl;
979 #endif
980 
981  data_url = parser->get_attribute_val("href", attributes, nb_attributes);
982  if (parser->debug())
983  cerr << "Processing 'href' value into data_url. href: " << data_url << endl;
984  }
985  else {
986  if (parser->debug()) cerr << "No attribute 'href' located. Trying Dataset/@dmrpp:href..." << endl;
987  // This bit of magic sets the URL used to get the data and it's
988  // magic in part because it may be a file or an http URL
989  data_url = parser->dmrpp_dataset_href;
990  if (parser->debug())
991  cerr << "Processing dmrpp:href into data_url. dmrpp:href='" << data_url << "'" << endl;
992  }
993  // First we see if it's an HTTP URL, and if not we
994  // make a local file url based on the Catalog Root
995 #if 0
996  std::string http("http://");
997  std::string https("https://");
998  std::string file("file://");
999  if (data_url.compare(0, http.size(), http) && data_url.compare(0, https.size(), https)
1000  && data_url.compare(0, file.size(), file)) {
1001 #endif
1002 
1003  if (data_url.find("http://") != 0 && data_url.find("https://") != 0 && data_url.find("file://") != 0) {
1004  if (parser->debug()) cerr << "data_url does NOT start with 'http://', 'https://' or 'file://'. "
1005  "Retrieving default catalog root directory" << endl;
1006 
1007  // Now we try to find the default catalog. If we can't find it we punt and leave it be.
1009  if (!defcat) {
1010  if (parser->debug()) cerr << "Not able to find the default catalog." << endl;
1011  }
1012  else {
1013  // Found the catalog so we get the root dir; make a file URL.
1015 
1016  if (parser->debug())
1017  cerr << "Found default catalog root_dir: '" << utils->get_root_dir() << "'" << endl;
1018 
1019  data_url = BESUtil::assemblePath(utils->get_root_dir(), data_url, true);
1020  data_url = "file://" + data_url;
1021  }
1022  }
1023 
1024  if (parser->debug()) cerr << "Processed data_url: '" << data_url << "'" << endl;
1025 
1026  unsigned long long offset = 0;
1027  unsigned long long size = 0;
1028  string chunk_position_in_array("");
1029 
1030  if (parser->check_required_attribute("offset", attributes, nb_attributes)) {
1031  istringstream offset_ss(parser->get_attribute_val("offset", attributes, nb_attributes));
1032  offset_ss >> offset;
1033  if (parser->debug()) cerr << "Processed attribute 'offset=\"" << offset << "\"'" << endl;
1034  }
1035  else {
1036  dmr_error(parser, "The hdf:byteStream element is missing the required attribute 'offset'.");
1037  }
1038 
1039  if (parser->check_required_attribute("nBytes", attributes, nb_attributes)) {
1040  istringstream size_ss(parser->get_attribute_val("nBytes", attributes, nb_attributes));
1041  size_ss >> size;
1042  if (parser->debug()) cerr << "Processed attribute 'nBytes=\"" << size << "\"'" << endl;
1043  }
1044  else {
1045  dmr_error(parser, "The hdf:byteStream element is missing the required attribute 'size'.");
1046  }
1047 
1048  if (parser->check_attribute("chunkPositionInArray", attributes, nb_attributes)) {
1049  istringstream chunk_position_ss(parser->get_attribute_val("chunkPositionInArray", attributes, nb_attributes));
1050  chunk_position_in_array = chunk_position_ss.str();
1051  if (parser->debug())
1052  cerr << "Found attribute 'chunkPositionInArray' value: " << chunk_position_ss.str() << endl;
1053  }
1054  else {
1055  if (parser->debug()) cerr << "No attribute 'chunkPositionInArray' located" << endl;
1056  }
1057 
1058  dc->add_chunk(data_url, size, offset, chunk_position_in_array);
1059  }
1060  }
1061  break;
1062 
1063  case inside_dmrpp_chunkDimensionSizes_element:
1064  // The dmrpp:chunkDimensionSizes value is processed by the end element code.
1065  break;
1066 
1067  case parser_unknown:
1068  case parser_error:
1069  case parser_fatal_error:
1070  break;
1071 
1072  case parser_end:
1073  // FIXME Error?
1074  break;
1075  }
1076 
1077  if (parser->debug()) cerr << "Start element exit state: " << states[parser->get_state()] << endl;
1078 }
1079 
1080 void DmrppParserSax2::dmr_end_element(void *p, const xmlChar *l, const xmlChar *prefix, const xmlChar *URI)
1081 {
1082  DmrppParserSax2 *parser = static_cast<DmrppParserSax2*>(p);
1083  const char *localname = (const char *) l;
1084 
1085  if (parser->debug())
1086  cerr << "End element " << localname << " (state " << states[parser->get_state()] << ")" << endl;
1087 
1088  switch (parser->get_state()) {
1089  case parser_start:
1090  dmr_fatal_error(parser, "Unexpected state, inside start state while processing element '%s'.", localname);
1091  break;
1092 
1093  case inside_dataset:
1094  if (is_not(localname, "Dataset"))
1095  DmrppParserSax2::dmr_error(parser, "Expected an end Dataset tag; found '%s' instead.", localname);
1096 
1097  parser->pop_state();
1098  if (parser->get_state() != parser_start)
1099  dmr_fatal_error(parser, "Unexpected state, expected start state.");
1100  else {
1101  parser->pop_state();
1102  parser->push_state(parser_end);
1103  }
1104  break;
1105 
1106  case inside_group: {
1107  if (is_not(localname, "Group"))
1108  DmrppParserSax2::dmr_error(parser, "Expected an end tag for a Group; found '%s' instead.", localname);
1109 
1110  if (!parser->empty_basetype() || parser->empty_group())
1111  DmrppParserSax2::dmr_error(parser,
1112  "The document did not contain a valid root Group or contained unbalanced tags.");
1113 
1114  parser->pop_group();
1115  parser->pop_state();
1116  break;
1117  }
1118 
1119  case inside_attribute_container:
1120  if (is_not(localname, "Attribute"))
1121  DmrppParserSax2::dmr_error(parser, "Expected an end Attribute tag; found '%s' instead.", localname);
1122 
1123  parser->pop_state();
1124  parser->pop_attributes();
1125  break;
1126 
1127  case inside_attribute:
1128  if (is_not(localname, "Attribute"))
1129  DmrppParserSax2::dmr_error(parser, "Expected an end Attribute tag; found '%s' instead.", localname);
1130 
1131  parser->pop_state();
1132  break;
1133 
1134  case inside_attribute_value: {
1135  if (is_not(localname, "Value"))
1136  DmrppParserSax2::dmr_error(parser, "Expected an end value tag; found '%s' instead.", localname);
1137 
1138  parser->pop_state();
1139 
1140  // The old code added more values using the name and type as
1141  // indexes to find the correct attribute. Use get() for that
1142  // now. Or fix this code to keep a pointer to the to attribute...
1143  D4Attributes *attrs = parser->top_attributes();
1144  D4Attribute *attr = attrs->get(parser->dods_attr_name);
1145  if (!attr) {
1146  attr = new D4Attribute(parser->dods_attr_name, StringToD4AttributeType(parser->dods_attr_type));
1147  attrs->add_attribute_nocopy(attr);
1148  }
1149  attr->add_value(parser->char_data);
1150 
1151  parser->char_data = ""; // Null this after use.
1152  break;
1153  }
1154 
1155  case inside_other_xml_attribute: {
1156  if (strcmp(localname, "Attribute") == 0 && parser->root_ns == (const char *) URI) {
1157  parser->pop_state();
1158 
1159  // The old code added more values using the name and type as
1160  // indexes to find the correct attribute. Use get() for that
1161  // now. Or fix this code to keep a pointer to the to attribute...
1162  D4Attributes *attrs = parser->top_attributes();
1163  D4Attribute *attr = attrs->get(parser->dods_attr_name);
1164  if (!attr) {
1165  attr = new D4Attribute(parser->dods_attr_name, StringToD4AttributeType(parser->dods_attr_type));
1166  attrs->add_attribute_nocopy(attr);
1167  }
1168  attr->add_value(parser->other_xml);
1169 
1170  parser->other_xml = ""; // Null this after use.
1171  }
1172  else {
1173  if (parser->other_xml_depth == 0) {
1174  DmrppParserSax2::dmr_error(parser, "Expected an OtherXML attribute to end! Instead I found '%s'",
1175  localname);
1176  break;
1177  }
1178  parser->other_xml_depth--;
1179 
1180  parser->other_xml.append("</");
1181  if (prefix) {
1182  parser->other_xml.append((const char *) prefix);
1183  parser->other_xml.append(":");
1184  }
1185  parser->other_xml.append(localname);
1186  parser->other_xml.append(">");
1187  }
1188  break;
1189  }
1190 
1191  case inside_enum_def:
1192  if (is_not(localname, "Enumeration"))
1193  DmrppParserSax2::dmr_error(parser, "Expected an end Enumeration tag; found '%s' instead.", localname);
1194  if (!parser->top_group())
1195  DmrppParserSax2::dmr_fatal_error(parser,
1196  "Expected a Group to be the current item, while finishing up an Enumeration.");
1197  else {
1198  // copy the pointer; not a deep copy
1199  parser->top_group()->enum_defs()->add_enum_nocopy(parser->enum_def());
1200  // Set the enum_def to null; next call to enum_def() will
1201  // allocate a new object
1202  parser->clear_enum_def();
1203  parser->pop_state();
1204  }
1205  break;
1206 
1207  case inside_enum_const:
1208  if (is_not(localname, "EnumConst"))
1209  DmrppParserSax2::dmr_error(parser, "Expected an end EnumConst tag; found '%s' instead.", localname);
1210 
1211  parser->pop_state();
1212  break;
1213 
1214  case inside_dim_def: {
1215  if (is_not(localname, "Dimension"))
1216  DmrppParserSax2::dmr_error(parser, "Expected an end Dimension tag; found '%s' instead.", localname);
1217 
1218  if (!parser->top_group())
1219  DmrppParserSax2::dmr_error(parser,
1220  "Expected a Group to be the current item, while finishing up an Dimension.");
1221 
1222  parser->top_group()->dims()->add_dim_nocopy(parser->dim_def());
1223  // Set the dim_def to null; next call to dim_def() will
1224  // allocate a new object. Calling 'clear' is important because
1225  // the cleanup method will free dim_def if it's not null and
1226  // we just copied the pointer in the add_dim_nocopy() call
1227  // above.
1228  parser->clear_dim_def();
1229  parser->pop_state();
1230  break;
1231  }
1232 
1233  case inside_simple_type:
1234  if (is_simple_type(get_type(localname))) {
1235  BaseType *btp = parser->top_basetype();
1236  parser->pop_basetype();
1237  parser->pop_attributes();
1238 
1239  BaseType *parent = 0;
1240  if (!parser->empty_basetype())
1241  parent = parser->top_basetype();
1242  else if (!parser->empty_group())
1243  parent = parser->top_group();
1244  else {
1245  dmr_fatal_error(parser, "Both the Variable and Groups stacks are empty while closing a %s element.",
1246  localname);
1247  delete btp;
1248  parser->pop_state();
1249  break;
1250  }
1251 
1252  if (parent->type() == dods_array_c)
1253  static_cast<Array*>(parent)->prototype()->add_var_nocopy(btp);
1254  else
1255  parent->add_var_nocopy(btp);
1256  }
1257  else
1258  DmrppParserSax2::dmr_error(parser, "Expected an end tag for a simple type; found '%s' instead.", localname);
1259 
1260  parser->pop_state();
1261  break;
1262 
1263  case inside_dim:
1264  if (is_not(localname, "Dim"))
1265  DmrppParserSax2::dmr_fatal_error(parser, "Expected an end Dim tag; found '%s' instead.", localname);
1266 
1267  parser->pop_state();
1268  break;
1269 
1270  case inside_map:
1271  if (is_not(localname, "Map"))
1272  DmrppParserSax2::dmr_fatal_error(parser, "Expected an end Map tag; found '%s' instead.", localname);
1273 
1274  parser->pop_state();
1275  break;
1276 
1277  case inside_constructor: {
1278  if (strcmp(localname, "Structure") != 0 && strcmp(localname, "Sequence") != 0) {
1279  DmrppParserSax2::dmr_error(parser, "Expected an end tag for a constructor; found '%s' instead.", localname);
1280  return;
1281  }
1282 
1283  BaseType *btp = parser->top_basetype();
1284  parser->pop_basetype();
1285  parser->pop_attributes();
1286 
1287  BaseType *parent = 0;
1288  if (!parser->empty_basetype())
1289  parent = parser->top_basetype();
1290  else if (!parser->empty_group())
1291  parent = parser->top_group();
1292  else {
1293  dmr_fatal_error(parser, "Both the Variable and Groups stacks are empty while closing a %s element.",
1294  localname);
1295  delete btp;
1296  parser->pop_state();
1297  break;
1298  }
1299 
1300  // TODO Why doesn't this code mirror the simple_var case and test
1301  // for the parent being an array? jhrg 10/13/13
1302  parent->add_var_nocopy(btp);
1303  parser->pop_state();
1304  break;
1305  }
1306 
1307  case not_dap4_element:
1308  if (parser->debug()) cerr << "End of non DAP4 element: " << localname << endl;
1309  parser->pop_state();
1310  break;
1311 
1312  case inside_dmrpp_object:
1313  if (parser->debug()) cerr << "End of dmrpp namespace element: " << localname << endl;
1314  parser->pop_state();
1315  break;
1316 
1317  case inside_dmrpp_chunkDimensionSizes_element: {
1318  if (parser->debug()) cerr << "End of chunkDimensionSizes element. localname: " << localname << endl;
1319 
1320  if (is_not(localname, "chunkDimensionSizes"))
1321  DmrppParserSax2::dmr_error(parser, "Expected an end value tag; found '%s' instead.", localname);
1322  DmrppCommon *dc = dynamic_cast<DmrppCommon*>(parser->top_basetype()); // Get the Dmrpp common info
1323  if (!dc)
1324  throw BESInternalError("Could not cast BaseType to DmrppType in the drmpp handler.", __FILE__, __LINE__);
1325  string element_text(parser->char_data);
1326  if (parser->debug()) cerr << "chunkDimensionSizes element_text: '" << element_text << "'" << endl;
1327  dc->parse_chunk_dimension_sizes(element_text);
1328  parser->char_data = ""; // Null this after use.
1329  parser->pop_state();
1330  break;
1331  }
1332 
1333  case parser_unknown:
1334  parser->pop_state();
1335  break;
1336 
1337  case parser_error:
1338  case parser_fatal_error:
1339  break;
1340 
1341  case parser_end:
1342  // FIXME Error?
1343  break;
1344  }
1345 
1346  if (parser->debug()) cerr << "End element exit state: " << states[parser->get_state()] << endl;
1347 }
1348 
1352 void DmrppParserSax2::dmr_get_characters(void * p, const xmlChar * ch, int len)
1353 {
1354  DmrppParserSax2 *parser = static_cast<DmrppParserSax2*>(p);
1355 
1356  switch (parser->get_state()) {
1357  case inside_attribute_value:
1358  case inside_dmrpp_chunkDimensionSizes_element:
1359  parser->char_data.append((const char *) (ch), len);
1360  BESDEBUG(module, "Characters[" << parser->char_data.size() << "]" << parser->char_data << "'" << endl);
1361  break;
1362 
1363  case inside_other_xml_attribute:
1364  parser->other_xml.append((const char *) (ch), len);
1365  BESDEBUG(module, "Other XML Characters: '" << parser->other_xml << "'" << endl);
1366  break;
1367 
1368  default:
1369  break;
1370  }
1371 }
1372 
1377 void DmrppParserSax2::dmr_ignoreable_whitespace(void *p, const xmlChar *ch, int len)
1378 {
1379  DmrppParserSax2 *parser = static_cast<DmrppParserSax2*>(p);
1380 
1381  switch (parser->get_state()) {
1382  case inside_other_xml_attribute:
1383  parser->other_xml.append((const char *) (ch), len);
1384  break;
1385 
1386  default:
1387  break;
1388  }
1389 }
1390 
1396 void DmrppParserSax2::dmr_get_cdata(void *p, const xmlChar *value, int len)
1397 {
1398  DmrppParserSax2 *parser = static_cast<DmrppParserSax2*>(p);
1399 
1400  switch (parser->get_state()) {
1401  case inside_other_xml_attribute:
1402  parser->other_xml.append((const char *) (value), len);
1403  break;
1404 
1405  case parser_unknown:
1406  break;
1407 
1408  default:
1409  DmrppParserSax2::dmr_error(parser, "Found a CData block but none are allowed by DAP4.");
1410 
1411  break;
1412  }
1413 }
1414 
1419 xmlEntityPtr DmrppParserSax2::dmr_get_entity(void *, const xmlChar * name)
1420 {
1421  return xmlGetPredefinedEntity(name);
1422 }
1423 
1434 void DmrppParserSax2::dmr_fatal_error(void * p, const char *msg, ...)
1435 {
1436  va_list args;
1437  DmrppParserSax2 *parser = static_cast<DmrppParserSax2*>(p);
1438 
1439  parser->push_state(parser_fatal_error);
1440 
1441  va_start(args, msg);
1442  char str[1024];
1443  vsnprintf(str, 1024, msg, args);
1444  va_end(args);
1445 
1446  int line = xmlSAX2GetLineNumber(parser->context);
1447 
1448  if (!parser->error_msg.empty()) parser->error_msg += "\n";
1449  parser->error_msg += "At line " + long_to_string(line) + ": " + string(str);
1450 }
1451 
1452 void DmrppParserSax2::dmr_error(void *p, const char *msg, ...)
1453 {
1454  va_list args;
1455  DmrppParserSax2 *parser = static_cast<DmrppParserSax2*>(p);
1456 
1457  parser->push_state(parser_error);
1458 
1459  va_start(args, msg);
1460  char str[1024];
1461  vsnprintf(str, 1024, msg, args);
1462  va_end(args);
1463 
1464  int line = xmlSAX2GetLineNumber(parser->context);
1465 
1466  if (!parser->error_msg.empty()) parser->error_msg += "\n";
1467  parser->error_msg += "At line " + long_to_string(line) + ": " + string(str);
1468 }
1470 
1474 void DmrppParserSax2::cleanup_parse()
1475 {
1476  bool wellFormed = context->wellFormed;
1477  bool valid = context->valid;
1478 
1479  // context->sax = NULL;
1480  // Leak. Removed the above. jhrg 6/19/19
1481  xmlFreeParserCtxt(context);
1482 
1483  delete d_enum_def;
1484  d_enum_def = 0;
1485 
1486  delete d_dim_def;
1487  d_dim_def = 0;
1488 
1489  // If there's an error, there may still be items on the stack at the
1490  // end of the parse.
1491  while (!btp_stack.empty()) {
1492  delete top_basetype();
1493  pop_basetype();
1494  }
1495 
1496  if (!wellFormed)
1497  throw Error("The DMR was not well formed. " + error_msg);
1498  else if (!valid)
1499  throw Error("The DMR was not valid." + error_msg);
1500  else if (get_state() == parser_error)
1501  throw Error(error_msg);
1502  else if (get_state() == parser_fatal_error) throw InternalErr(error_msg);
1503 }
1504 
1519 void DmrppParserSax2::intern(istream &f, DMR *dest_dmr, bool debug)
1520 {
1521  d_debug = debug;
1522 
1523  // Code example from libxml2 docs re: read from a stream.
1524 
1525  if (!f.good()) throw Error("Input stream not open or read error");
1526  if (!dest_dmr) throw InternalErr(__FILE__, __LINE__, "DMR object is null");
1527 
1528  d_dmr = dest_dmr; // dump values here
1529 
1530 #if 0
1531  int line_num = 1;
1532  string line;
1533 
1534  // Get the <xml ... ?> line
1535  getline(f, line);
1536  if (line.length() == 0) throw Error("No input found while parsing the DMR.");
1537 
1538  if (debug) cerr << "line: (" << line_num << "): " << endl << line << endl << endl;
1539 
1540  context = xmlCreatePushParserCtxt(&dmrpp_sax_parser, this, line.c_str(), line.length(), "stream");
1541  context->validate = true;
1542  push_state(parser_start);
1543 
1544  // Get the first line of stuff
1545  getline(f, line);
1546  ++line_num;
1547 
1548  if (debug) cerr << "line: (" << line_num << "): " << endl << line << endl << endl;
1549 
1550  while (!f.eof() && (get_state() != parser_end)) {
1551  xmlParseChunk(context, line.c_str(), line.length(), 0);
1552  // Get the next line
1553  getline(f, line);
1554  ++line_num;
1555  if (debug) cerr << "line: (" << line_num << "): " << endl << line << endl << endl;
1556  }
1557 
1558  // This call ends the parse.
1559  xmlParseChunk(context, line.c_str(), 0, 1/*terminate*/);
1560 #else
1561  int line_num = 1;
1562  string line;
1563 
1564  // Get the XML prolog line (looks like: <?xml ... ?> )
1565  getline(f, line);
1566  if (line.length() == 0) throw Error("No input found while parsing the DMR.");
1567 
1568  if (debug) cerr << "line: (" << line_num << "): " << endl << line << endl << endl;
1569 
1570  context = xmlCreatePushParserCtxt(&dmrpp_sax_parser, this, line.c_str(), line.length(), "stream");
1571  context->validate = true;
1572  push_state(parser_start);
1573 
1574  // Get the first chunk of the stuff
1575  long chunk_count = 0;
1576  long chunk_size = 0;
1577 
1578  f.read(d_parse_buffer, D4_PARSE_BUFF_SIZE);
1579  chunk_size=f.gcount();
1580  d_parse_buffer[chunk_size]=0; // null terminate the string. We can do it this way because the buffer is +1 bigger than D4_PARSE_BUFF_SIZE
1581  if (debug) cerr << "chunk: (" << chunk_count++ << "): " << endl << d_parse_buffer << endl << endl;
1582 
1583  while(!f.eof() && (get_state() != parser_end)){
1584 
1585  xmlParseChunk(context, d_parse_buffer, chunk_size, 0);
1586 
1587  // There is more to read. Get the next chunk
1588  f.read(d_parse_buffer, D4_PARSE_BUFF_SIZE);
1589  chunk_size=f.gcount();
1590  d_parse_buffer[chunk_size]=0; // null terminate the string. We can do it this way because the buffer is +1 bigger than D4_PARSE_BUFF_SIZE
1591  if (debug) cerr << "chunk: (" << chunk_count++ << "): " << endl << d_parse_buffer << endl << endl;
1592  }
1593 
1594  // This call ends the parse.
1595  xmlParseChunk(context, d_parse_buffer, chunk_size, 1/*terminate*/);
1596 #endif
1597 
1598  // This checks that the state on the parser stack is parser_end and throws
1599  // an exception if it's not (i.e., the loop exited with gcount() == 0).
1600  cleanup_parse();
1601 }
1602 
1603 
1604 
1615 void DmrppParserSax2::intern(const string &document, DMR *dest_dmr, bool debug)
1616 {
1617  intern(document.c_str(), document.length(), dest_dmr, debug);
1618 }
1619 
1630 void DmrppParserSax2::intern(const char *buffer, int size, DMR *dest_dmr, bool debug)
1631 {
1632  if (!(size > 0)) return;
1633 
1634  d_debug = debug;
1635 
1636  // Code example from libxml2 docs re: read from a stream.
1637 
1638  if (!dest_dmr) throw InternalErr(__FILE__, __LINE__, "DMR object is null");
1639  d_dmr = dest_dmr; // dump values in dest_dmr
1640 
1641  push_state(parser_start);
1642  context = xmlCreatePushParserCtxt(&dmrpp_sax_parser, this, buffer, size, "stream");
1643  context->validate = true;
1644 
1645  // This call ends the parse.
1646  xmlParseChunk(context, buffer, 0, 1/*terminate*/);
1647 
1648  // This checks that the state on the parser stack is parser_end and throws
1649  // an exception if it's not (i.e., the loop exited with gcount() == 0).
1650  cleanup_parse();
1651 }
1652 
1653 } // namespace dmrpp
BESCatalogUtils
Definition: BESCatalogUtils.h:61
BESCatalogList::default_catalog
virtual BESCatalog * default_catalog() const
The the default catalog.
Definition: BESCatalogList.h:118
BESUtil::assemblePath
static std::string assemblePath(const std::string &firstPart, const std::string &secondPart, bool leadingSlash=false, bool trailingSlash=false)
Assemble path fragments making sure that they are separated by a single '/' character.
Definition: BESUtil.cc:821
Type
Type
Type of JSON value.
Definition: cmr_module/rapidjson/rapidjson.h:603
libdap
Definition: BESDapFunctionResponseCache.h:35
BESCatalogList::TheCatalogList
static BESCatalogList * TheCatalogList()
Get the singleton BESCatalogList instance.
Definition: BESCatalogList.cc:81
dmrpp::DmrppParserSax2
Definition: DmrppParserSax2.h:62
BESInternalError
exception thrown if internal error encountered
Definition: BESInternalError.h:43
BESCatalog::get_catalog_utils
virtual BESCatalogUtils * get_catalog_utils() const
Get a pointer to the utilities, customized for this catalog.
Definition: BESCatalog.h:113
BESCatalogUtils::get_root_dir
const std::string & get_root_dir() const
Get the root directory of the catalog.
Definition: BESCatalogUtils.h:102
Error
BESCatalog
Catalogs provide a hierarchical organization for data.
Definition: BESCatalog.h:51