libdap  Updated for version 3.18.3
D4ParserSax2.cc
1 // -*- mode: c++; c-basic-offset:4 -*-
2 
3 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
4 // Access Protocol.
5 
6 // Copyright (c) 2012 OPeNDAP, Inc.
7 // Author: James Gallagher <jgallagher@opendap.org>
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 //
23 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
24 
25 #include "config.h"
26 
27 //#define DODS_DEBUG 1
28 
29 #include <iostream>
30 #include <sstream>
31 
32 #include <cstring>
33 #include <cstdarg>
34 #include <cassert>
35 
36 #include <libxml/parserInternals.h>
37 
38 #include "DMR.h"
39 
40 #include "BaseType.h"
41 #include "Array.h"
42 #include "D4Group.h"
43 #include "D4Attributes.h"
44 #include "D4Maps.h"
45 #include "D4Enum.h"
46 #include "D4BaseTypeFactory.h"
47 
48 #include "DapXmlNamespaces.h"
49 #include "D4ParserSax2.h"
50 
51 #include "util.h"
52 #include "debug.h"
53 
54 
55 namespace libdap {
56 
57 static const char *states[] = {
58  "parser_start",
59 
60  "inside_dataset",
61 
62  // inside_group is the state just after parsing the start of a Group
63  // element.
64  "inside_group",
65 
66  "inside_attribute_container",
67  "inside_attribute",
68  "inside_attribute_value",
69  "inside_other_xml_attribute",
70 
71  "inside_enum_def",
72  "inside_enum_const",
73 
74  "inside_dim_def",
75 
76  // This covers Byte, ..., Url, Opaque
77  "inside_simple_type",
78 
79  // "inside_array",
80  "inside_dim",
81  "inside_map",
82 
83  "inside_constructor",
84 
85  "not_dap4_element",
86 
87  "parser_unknown",
88  "parser_error",
89  "parser_fatal_error",
90 
91  "parser_end"
92 };
93 
94 static bool is_not(const char *name, const char *tag)
95 {
96  return strcmp(name, tag) != 0;
97 }
98 
107 D4EnumDef *
108 D4ParserSax2::enum_def()
109 {
110  if (!d_enum_def) d_enum_def = new D4EnumDef;
111 
112  return d_enum_def;
113 }
114 
121 D4Dimension *
122 D4ParserSax2::dim_def() {
123  if (!d_dim_def) d_dim_def = new D4Dimension;
124 
125  return d_dim_def;
126 }
127 
133 void D4ParserSax2::transfer_xml_attrs(const xmlChar **attributes, int nb_attributes)
134 {
135  if (!xml_attrs.empty())
136  xml_attrs.clear(); // erase old attributes
137 
138  // Make a value using the attribute name and the prefix, namespace URI
139  // and the value. The prefix might be null.
140  unsigned int index = 0;
141  for (int i = 0; i < nb_attributes; ++i, index += 5) {
142  xml_attrs.insert(map<string, XMLAttribute>::value_type(string((const char *)attributes[index]),
143  XMLAttribute(attributes + index + 1)));
144 
145  DBG(cerr << "XML Attribute '" << (const char *)attributes[index] << "': "
146  << xml_attrs[(const char *)attributes[index]].value << endl);
147  }
148 }
149 
156 void D4ParserSax2::transfer_xml_ns(const xmlChar **namespaces, int nb_namespaces)
157 {
158  // make a value with the prefix and namespace URI. The prefix might be null.
159  for (int i = 0; i < nb_namespaces; ++i) {
160  namespace_table.insert(map<string, string>::value_type(namespaces[i * 2] != 0 ? (const char *)namespaces[i * 2] : "",
161  (const char *)namespaces[i * 2 + 1]));
162  }
163 }
164 
171 bool D4ParserSax2::check_required_attribute(const string & attr)
172 {
173  if (xml_attrs.find(attr) == xml_attrs.end()) {
174  dmr_error(this, "Required attribute '%s' not found.", attr.c_str());
175  return false;
176  }
177  else
178  return true;
179 }
180 
187 bool D4ParserSax2::check_attribute(const string & attr)
188 {
189  return (xml_attrs.find(attr) != xml_attrs.end());
190 }
191 
192 bool D4ParserSax2::process_dimension_def(const char *name, const xmlChar **attrs, int nb_attributes)
193 {
194  if (is_not(name, "Dimension"))
195  return false;
196 
197  transfer_xml_attrs(attrs, nb_attributes);
198 
199  if (!(check_required_attribute("name") && check_required_attribute("size"))) {
200  dmr_error(this, "The required attribute 'name' or 'size' was missing from a Dimension element.");
201  return false;
202  }
203 
204  // This getter (dim_def) allocates a new object if needed.
205  dim_def()->set_name(xml_attrs["name"].value);
206  try {
207  dim_def()->set_size(xml_attrs["size"].value);
208  }
209  catch (Error &e) {
210  dmr_error(this, e.get_error_message().c_str());
211  return false;
212  }
213 
214  return true;
215 }
216 
234 bool D4ParserSax2::process_dimension(const char *name, const xmlChar **attrs, int nb_attributes)
235 {
236  if (is_not(name, "Dim"))
237  return false;
238 
239  transfer_xml_attrs(attrs, nb_attributes);
240 
241  if (check_attribute("size") && check_attribute("name")) {
242  dmr_error(this, "Only one of 'size' and 'name' are allowed in a Dim element, but both were used.");
243  return false;
244  }
245  if (!(check_attribute("size") || check_attribute("name"))) {
246  dmr_error(this, "Either 'size' or 'name' must be used in a Dim element.");
247  return false;
248  }
249 
250  if (!top_basetype()->is_vector_type()) {
251  // Make the top BaseType* an array
252  BaseType *b = top_basetype();
253  pop_basetype();
254 
255  Array *a = static_cast<Array*>(dmr()->factory()->NewVariable(dods_array_c, b->name()));
256  a->set_is_dap4(true);
257  a->add_var_nocopy(b);
258  a->set_attributes_nocopy(b->attributes());
259  // trick: instead of popping b's attributes, copying them and then pushing
260  // a's copy, just move the pointer (but make sure there's only one object that
261  // references that pointer).
262  b->set_attributes_nocopy(0);
263 
264  push_basetype(a);
265  }
266 
267  assert(top_basetype()->is_vector_type());
268 
269  Array *a = static_cast<Array*>(top_basetype());
270  if (check_attribute("size")) {
271  a->append_dim(atoi(xml_attrs["size"].value.c_str())); // low budget code for now. jhrg 8/20/13
272  return true;
273  }
274  else if (check_attribute("name")) {
275  string name = xml_attrs["name"].value;
276 
277  D4Dimension *dim = 0;
278  if (name[0] == '/') // lookup the Dimension in the root group
279  dim = dmr()->root()->find_dim(name);
280  else // get enclosing Group and lookup Dimension there
281  dim = top_group()->find_dim(name);
282 
283  if (!dim)
284  throw Error("The dimension '" + name + "' was not found while parsing the variable '" + a->name() + "'.");
285  a->append_dim(dim);
286  return true;
287  }
288 
289  return false;
290 }
291 
292 bool D4ParserSax2::process_map(const char *name, const xmlChar **attrs, int nb_attributes)
293 {
294  if (is_not(name, "Map"))
295  return false;
296 
297  transfer_xml_attrs(attrs, nb_attributes);
298 
299  if (!check_attribute("name")) {
300  dmr_error(this, "The 'name' attribute must be used in a Map element.");
301  return false;
302  }
303 
304  if (!top_basetype()->is_vector_type()) {
305  // Make the top BaseType* an array
306  BaseType *b = top_basetype();
307  pop_basetype();
308 
309  Array *a = static_cast<Array*>(dmr()->factory()->NewVariable(dods_array_c, b->name()));
310  a->set_is_dap4(true);
311  a->add_var_nocopy(b);
312  a->set_attributes_nocopy(b->attributes());
313  // trick: instead of popping b's attributes, copying them and then pushing
314  // a's copy, just move the pointer (but make sure there's only one object that
315  // references that pointer).
316  b->set_attributes_nocopy(0);
317 
318  push_basetype(a);
319  }
320 
321  assert(top_basetype()->is_vector_type());
322 
323  Array *a = static_cast<Array*>(top_basetype());
324 
325  string map_name = xml_attrs["name"].value;
326  if (xml_attrs["name"].value[0] != '/')
327  map_name = top_group()->FQN() + map_name;
328 
329  Array *map_source = 0; // The array variable that holds the data for the Map
330 
331  if (map_name[0] == '/') // lookup the Map in the root group
332  map_source = dmr()->root()->find_map_source(map_name);
333  else // get enclosing Group and lookup Map there
334  map_source = top_group()->find_map_source(map_name);
335 
336  // Change: If the parser is in 'strict' mode (the default) and the Array named by
337  // the Map cannot be fond, it is an error. If 'strict' mode is false (permissive
338  // mode), then this is not an error. However, the Array referenced by the Map will
339  // be null. This is a change in the parser's behavior to accommodate requests for
340  // Arrays that include Maps that do not also include the Map(s) in the request.
341  // See https://opendap.atlassian.net/browse/HYRAX-98. jhrg 4/13/16
342  if (!map_source && d_strict)
343  throw Error("The Map '" + map_name + "' was not found while parsing the variable '" + a->name() + "'.");
344 
345  a->maps()->add_map(new D4Map(map_name, map_source));
346 
347  return true;
348 }
349 
350 bool D4ParserSax2::process_group(const char *name, const xmlChar **attrs, int nb_attributes)
351 {
352  if (is_not(name, "Group"))
353  return false;
354 
355  transfer_xml_attrs(attrs, nb_attributes);
356 
357  if (!check_required_attribute("name")) {
358  dmr_error(this, "The required attribute 'name' was missing from a Group element.");
359  return false;
360  }
361 
362  BaseType *btp = dmr()->factory()->NewVariable(dods_group_c, xml_attrs["name"].value);
363  if (!btp) {
364  dmr_fatal_error(this, "Could not instantiate the Group '%s'.", xml_attrs["name"].value.c_str());
365  return false;
366  }
367 
368  D4Group *grp = static_cast<D4Group*>(btp);
369 
370  // Need to set this to get the D4Attribute behavior in the type classes
371  // shared between DAP2 and DAP4. jhrg 4/18/13
372  grp->set_is_dap4(true);
373 
374  // link it up and change the current group
375  D4Group *parent = top_group();
376  if (!parent) {
377  dmr_fatal_error(this, "No Group on the Group stack.");
378  return false;
379  }
380 
381  grp->set_parent(parent);
382  parent->add_group_nocopy(grp);
383 
384  push_group(grp);
385  push_attributes(grp->attributes());
386  return true;
387 }
388 
395 inline bool D4ParserSax2::process_attribute(const char *name, const xmlChar **attrs, int nb_attributes)
396 {
397  if (is_not(name, "Attribute"))
398  return false;
399 
400  // These methods set the state to parser_error if a problem is found.
401  transfer_xml_attrs(attrs, nb_attributes);
402 
403  // add error
404  if (!(check_required_attribute(string("name")) && check_required_attribute(string("type")))) {
405  dmr_error(this, "The required attribute 'name' or 'type' was missing from an Attribute element.");
406  return false;
407  }
408 
409  if (xml_attrs["type"].value == "Container") {
410  push_state(inside_attribute_container);
411 
412  DBG(cerr << "Pushing attribute container " << xml_attrs["name"].value << endl);
413  D4Attribute *child = new D4Attribute(xml_attrs["name"].value, attr_container_c);
414 
415  D4Attributes *tos = top_attributes();
416  // add return
417  if (!tos) {
418  delete child;
419  dmr_fatal_error(this, "Expected an Attribute container on the top of the attribute stack.");
420  return false;
421  }
422 
423  tos->add_attribute_nocopy(child);
424  push_attributes(child->attributes());
425  }
426  else if (xml_attrs["type"].value == "OtherXML") {
427  push_state(inside_other_xml_attribute);
428 
429  dods_attr_name = xml_attrs["name"].value;
430  dods_attr_type = xml_attrs["type"].value;
431  }
432  else {
433  push_state(inside_attribute);
434 
435  dods_attr_name = xml_attrs["name"].value;
436  dods_attr_type = xml_attrs["type"].value;
437  }
438 
439  return true;
440 }
441 
447 inline bool D4ParserSax2::process_enum_def(const char *name, const xmlChar **attrs, int nb_attributes)
448 {
449  if (is_not(name, "Enumeration"))
450  return false;
451 
452  transfer_xml_attrs(attrs, nb_attributes);
453 
454  if (!(check_required_attribute("name") && check_required_attribute("basetype"))) {
455  dmr_error(this, "The required attribute 'name' or 'basetype' was missing from an Enumeration element.");
456  return false;
457  }
458 
459  Type t = get_type(xml_attrs["basetype"].value.c_str());
460  if (!is_integer_type(t)) {
461  dmr_error(this, "The Enumeration '%s' must have an integer type, instead the type '%s' was used.",
462  xml_attrs["name"].value.c_str(), xml_attrs["basetype"].value.c_str());
463  return false;
464  }
465 
466  // This getter allocates a new object if needed.
467  string enum_def_path = xml_attrs["name"].value;
468 #if 0
469  // Use FQNs when things are referenced, not when they are defined
470  if (xml_attrs["name"].value[0] != '/')
471  enum_def_path = top_group()->FQN() + enum_def_path;
472 #endif
473  enum_def()->set_name(enum_def_path);
474  enum_def()->set_type(t);
475 
476  return true;
477 }
478 
479 inline bool D4ParserSax2::process_enum_const(const char *name, const xmlChar **attrs, int nb_attributes)
480 {
481  if (is_not(name, "EnumConst"))
482  return false;
483 
484  // These methods set the state to parser_error if a problem is found.
485  transfer_xml_attrs(attrs, nb_attributes);
486 
487  if (!(check_required_attribute("name") && check_required_attribute("value"))) {
488  dmr_error(this, "The required attribute 'name' or 'value' was missing from an EnumConst element.");
489  return false;
490  }
491 
492  istringstream iss(xml_attrs["value"].value);
493  long long value = 0;
494  iss >> skipws >> value;
495  if (iss.fail() || iss.bad()) {
496  dmr_error(this, "Expected an integer value for an Enumeration constant, got '%s' instead.",
497  xml_attrs["value"].value.c_str());
498  }
499  else if (!enum_def()->is_valid_enum_value(value)) {
500  dmr_error(this, "In an Enumeration constant, the value '%s' cannot fit in a variable of type '%s'.",
501  xml_attrs["value"].value.c_str(), D4type_name(d_enum_def->type()).c_str());
502  }
503  else {
504  // unfortunate choice of names... args are 'label' and 'value'
505  enum_def()->add_value(xml_attrs["name"].value, value);
506  }
507 
508  return true;
509 }
510 
516 inline bool D4ParserSax2::process_variable(const char *name, const xmlChar **attrs, int nb_attributes)
517 {
518  Type t = get_type(name);
519  if (is_simple_type(t)) {
520  process_variable_helper(t, inside_simple_type, attrs, nb_attributes);
521  return true;
522  }
523  else {
524  switch(t) {
525  case dods_structure_c:
526  process_variable_helper(t, inside_constructor, attrs, nb_attributes);
527  return true;
528 
529  case dods_sequence_c:
530  process_variable_helper(t, inside_constructor, attrs, nb_attributes);
531  return true;
532 
533  default:
534  return false;
535  }
536  }
537 }
538 
546 void D4ParserSax2::process_variable_helper(Type t, ParseState s, const xmlChar **attrs, int nb_attributes)
547 {
548  transfer_xml_attrs(attrs, nb_attributes);
549 
550  if (check_required_attribute("name")) {
551  BaseType *btp = dmr()->factory()->NewVariable(t, xml_attrs["name"].value);
552  if (!btp) {
553  dmr_fatal_error(this, "Could not instantiate the variable '%s'.", xml_attrs["name"].value.c_str());
554  return;
555  }
556 
557  if ((t == dods_enum_c) && check_required_attribute("enum")) {
558  D4EnumDef *enum_def = 0;
559  string enum_path = xml_attrs["enum"].value;
560  if (enum_path[0] == '/')
561  enum_def = dmr()->root()->find_enum_def(enum_path);
562  else
563  enum_def = top_group()->find_enum_def(enum_path);
564 
565  if (!enum_def)
566  dmr_fatal_error(this, "Could not find the Enumeration definition '%s'.", enum_path.c_str());
567 
568  static_cast<D4Enum*>(btp)->set_enumeration(enum_def);
569  }
570 
571  btp->set_is_dap4(true); // see comment above
572  push_basetype(btp);
573 
574  push_attributes(btp->attributes());
575 
576  push_state(s);
577  }
578 }
579 
586 
592 {
593  D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
594  parser->error_msg = "";
595  parser->char_data = "";
596 
597  // Set this in intern_helper so that the loop test for the parser_end
598  // state works for the first iteration. It seems like XMLParseChunk calls this
599  // function on it's first run. jhrg 9/16/13
600  // parser->push_state(parser_start);
601 
602  parser->push_attributes(parser->dmr()->root()->attributes());
603 
604  if (parser->debug()) cerr << "Parser start state: " << states[parser->get_state()] << endl;
605 }
606 
610 {
611  D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
612 
613  if (parser->debug()) cerr << "Parser end state: " << states[parser->get_state()] << endl;
614 
615  if (parser->get_state() != parser_end)
616  D4ParserSax2::dmr_error(parser, "The document contained unbalanced tags.");
617 
618  // If we've found any sort of error, don't make the DMR; intern() will
619  // take care of the error.
620  if (parser->get_state() == parser_error || parser->get_state() == parser_fatal_error)
621  return;
622 
623  if (!parser->empty_basetype() || parser->empty_group())
624  D4ParserSax2::dmr_error(parser, "The document did not contain a valid root Group or contained unbalanced tags.");
625 
626  parser->pop_group(); // leave the stack 'clean'
627  parser->pop_attributes();
628 }
629 
643 void D4ParserSax2::dmr_start_element(void *p, const xmlChar *l, const xmlChar *prefix, const xmlChar *URI,
644  int nb_namespaces, const xmlChar **namespaces, int nb_attributes, int /*nb_defaulted*/,
645  const xmlChar **attributes)
646 {
647  D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
648  const char *localname = (const char *) l;
649 
650  if (parser->debug()) cerr << "Start element " << localname << " prefix: "<< (prefix?(char *)prefix:"null") << " ns: "<< (URI?(char *)URI:"null")
651  << " (state: " << states[parser->get_state()] << ")" << endl;
652 
653  if(parser->get_state() != parser_error){
654  string dap4_ns_name = DapXmlNamspaces::getDapNamespaceString(DAP_4_0);
655  if (parser->debug()) cerr << "dap4_ns_name: " << dap4_ns_name << endl;
656 
657  string this_element_ns_name((char *)URI);
658  if (parser->debug()) cerr << "this_element_ns_name: " << this_element_ns_name << endl;
659 
660  if(this_element_ns_name.compare(dap4_ns_name)){
661  if (parser->debug()) cerr << "Start of non DAP4 element: " << localname << " detected." << endl;
662  parser->push_state(not_dap4_element);
663  // return;
664  }
665  }
666 
667 
668  switch (parser->get_state()) {
669  case parser_start:
670  if (is_not(localname, "Dataset"))
671  D4ParserSax2::dmr_error(parser, "Expected DMR to start with a Dataset element; found '%s' instead.", localname);
672 
673  parser->root_ns = URI ? (const char *) URI : "";
674  parser->transfer_xml_attrs(attributes, nb_attributes);
675 
676  if (parser->check_required_attribute(string("name")))
677  parser->dmr()->set_name(parser->xml_attrs["name"].value);
678 
679  if (parser->check_attribute("dapVersion"))
680  parser->dmr()->set_dap_version(parser->xml_attrs["dapVersion"].value);
681 
682  if (parser->check_attribute("dmrVersion"))
683  parser->dmr()->set_dmr_version(parser->xml_attrs["dmrVersion"].value);
684 
685  if (parser->check_attribute("base"))
686  parser->dmr()->set_request_xml_base(parser->xml_attrs["base"].value);
687 
688  if (!parser->root_ns.empty())
689  parser->dmr()->set_namespace(parser->root_ns);
690 
691  // Push the root Group on the stack
692  parser->push_group(parser->dmr()->root());
693 
694  parser->push_state(inside_dataset);
695 
696  break;
697 
698  // Both inside dataset and inside group can have the same stuff.
699  // The difference is that the Dataset holds the root group, which
700  // must be present; other groups are optional
701  case inside_dataset:
702  case inside_group:
703  if (parser->process_enum_def(localname, attributes, nb_attributes))
704  parser->push_state(inside_enum_def);
705  else if (parser->process_dimension_def(localname, attributes, nb_attributes))
706  parser->push_state(inside_dim_def);
707  else if (parser->process_group(localname, attributes, nb_attributes))
708  parser->push_state(inside_group);
709  else if (parser->process_variable(localname, attributes, nb_attributes))
710  // This will push either inside_simple_type or inside_structure
711  // onto the parser state stack.
712  break;
713  else if (parser->process_attribute(localname, attributes, nb_attributes))
714  // This will push either inside_attribute, inside_attribute_container
715  // or inside_otherxml_attribute onto the parser state stack
716  break;
717  else
718  D4ParserSax2::dmr_error(parser, "Expected an Attribute, Enumeration, Dimension, Group or variable element; found '%s' instead.", localname);
719  break;
720 
721  case inside_attribute_container:
722  if (parser->process_attribute(localname, attributes, nb_attributes))
723  break;
724  else
725  D4ParserSax2::dmr_error(parser, "Expected an Attribute element; found '%s' instead.", localname);
726  break;
727 
728  case inside_attribute:
729  if (parser->process_attribute(localname, attributes, nb_attributes))
730  break;
731  else if (strcmp(localname, "Value") == 0)
732  parser->push_state(inside_attribute_value);
733  else
734  dmr_error(parser, "Expected an 'Attribute' or 'Value' element; found '%s' instead.", localname);
735  break;
736 
737  case inside_attribute_value:
738  // Attribute values are processed by the end element code.
739  break;
740 
741  case inside_other_xml_attribute:
742  parser->other_xml_depth++;
743 
744  // Accumulate the elements here
745  parser->other_xml.append("<");
746  if (prefix) {
747  parser->other_xml.append((const char *) prefix);
748  parser->other_xml.append(":");
749  }
750  parser->other_xml.append(localname);
751 
752  if (nb_namespaces != 0) {
753  parser->transfer_xml_ns(namespaces, nb_namespaces);
754 
755  for (map<string, string>::iterator i = parser->namespace_table.begin();
756  i != parser->namespace_table.end(); ++i) {
757  parser->other_xml.append(" xmlns");
758  if (!i->first.empty()) {
759  parser->other_xml.append(":");
760  parser->other_xml.append(i->first);
761  }
762  parser->other_xml.append("=\"");
763  parser->other_xml.append(i->second);
764  parser->other_xml.append("\"");
765  }
766  }
767 
768  if (nb_attributes != 0) {
769  parser->transfer_xml_attrs(attributes, nb_attributes);
770  for (XMLAttrMap::iterator i = parser->xml_attr_begin(); i != parser->xml_attr_end(); ++i) {
771  parser->other_xml.append(" ");
772  if (!i->second.prefix.empty()) {
773  parser->other_xml.append(i->second.prefix);
774  parser->other_xml.append(":");
775  }
776  parser->other_xml.append(i->first);
777  parser->other_xml.append("=\"");
778  parser->other_xml.append(i->second.value);
779  parser->other_xml.append("\"");
780  }
781  }
782 
783  parser->other_xml.append(">");
784  break;
785 
786  case inside_enum_def:
787  // process an EnumConst element
788  if (parser->process_enum_const(localname, attributes, nb_attributes))
789  parser->push_state(inside_enum_const);
790  else
791  dmr_error(parser, "Expected an 'EnumConst' element; found '%s' instead.", localname);
792  break;
793 
794  case inside_enum_const:
795  // No content; nothing to do
796  break;
797 
798  case inside_dim_def:
799  // No content; nothing to do
800  break;
801 #if 0
802  case inside_dimension:
803  // No content.
804  break;
805 #endif
806  case inside_dim:
807  // No content.
808  break;
809 
810  case inside_map:
811  // No content.
812  break;
813 
814  case inside_simple_type:
815  if (parser->process_attribute(localname, attributes, nb_attributes))
816  break;
817  else if (parser->process_dimension(localname, attributes, nb_attributes))
818  parser->push_state(inside_dim);
819  else if (parser->process_map(localname, attributes, nb_attributes))
820  parser->push_state(inside_map);
821  else
822  dmr_error(parser, "Expected an 'Attribute', 'Dim' or 'Map' element; found '%s' instead.", localname);
823  break;
824 
825  case inside_constructor:
826  if (parser->process_variable(localname, attributes, nb_attributes))
827  // This will push either inside_simple_type or inside_structure
828  // onto the parser state stack.
829  break;
830  else if (parser->process_attribute(localname, attributes, nb_attributes))
831  break;
832  else if (parser->process_dimension(localname, attributes, nb_attributes))
833  parser->push_state(inside_dim);
834  else if (parser->process_map(localname, attributes, nb_attributes))
835  parser->push_state(inside_map);
836  else
837  D4ParserSax2::dmr_error(parser, "Expected an Attribute, Dim, Map or variable element; found '%s' instead.", localname);
838  break;
839 
840  case not_dap4_element:
841  if (parser->debug()) cerr << "Inside non DAP4 element. localname: " << localname << endl;
842  break;
843 
844  case parser_unknown:
845  // FIXME?
846  // *** Never used? If so remove/error
847  parser->push_state(parser_unknown);
848  break;
849 
850  case parser_error:
851  case parser_fatal_error:
852  break;
853 
854  case parser_end:
855  // FIXME Error?
856  break;
857  }
858 
859  if (parser->debug()) cerr << "Start element exit state: " << states[parser->get_state()] << endl;
860 }
861 
862 void D4ParserSax2::dmr_end_element(void *p, const xmlChar *l, const xmlChar *prefix, const xmlChar *URI)
863 {
864  D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
865  const char *localname = (const char *) l;
866 
867  if (parser->debug())
868  cerr << "End element " << localname << " (state " << states[parser->get_state()] << ")" << endl;
869 
870  switch (parser->get_state()) {
871  case parser_start:
872  dmr_fatal_error(parser, "Unexpected state, inside start state while processing element '%s'.", localname);
873  break;
874 
875  case inside_dataset:
876  if (is_not(localname, "Dataset"))
877  D4ParserSax2::dmr_error(parser, "Expected an end Dataset tag; found '%s' instead.", localname);
878 
879  parser->pop_state();
880  if (parser->get_state() != parser_start)
881  dmr_fatal_error(parser, "Unexpected state, expected start state.");
882  else {
883  parser->pop_state();
884  parser->push_state(parser_end);
885  }
886  break;
887 
888  case inside_group: {
889  if (is_not(localname, "Group"))
890  D4ParserSax2::dmr_error(parser, "Expected an end tag for a Group; found '%s' instead.", localname);
891 
892  if (!parser->empty_basetype() || parser->empty_group())
893  D4ParserSax2::dmr_error(parser,
894  "The document did not contain a valid root Group or contained unbalanced tags.");
895 
896  parser->pop_group();
897  parser->pop_state();
898  break;
899  }
900 
901  case inside_attribute_container:
902  if (is_not(localname, "Attribute"))
903  D4ParserSax2::dmr_error(parser, "Expected an end Attribute tag; found '%s' instead.", localname);
904 
905  parser->pop_state();
906  parser->pop_attributes();
907  break;
908 
909  case inside_attribute:
910  if (is_not(localname, "Attribute"))
911  D4ParserSax2::dmr_error(parser, "Expected an end Attribute tag; found '%s' instead.", localname);
912 
913  parser->pop_state();
914  break;
915 
916  case inside_attribute_value: {
917  if (is_not(localname, "Value"))
918  D4ParserSax2::dmr_error(parser, "Expected an end value tag; found '%s' instead.", localname);
919 
920  parser->pop_state();
921 
922  // The old code added more values using the name and type as
923  // indexes to find the correct attribute. Use get() for that
924  // now. Or fix this code to keep a pointer to the to attribute...
925  D4Attributes *attrs = parser->top_attributes();
926  D4Attribute *attr = attrs->get(parser->dods_attr_name);
927  if (!attr) {
928  attr = new D4Attribute(parser->dods_attr_name, StringToD4AttributeType(parser->dods_attr_type));
929  attrs->add_attribute_nocopy(attr);
930  }
931  attr->add_value(parser->char_data);
932 
933  parser->char_data = ""; // Null this after use.
934  break;
935  }
936 
937  case inside_other_xml_attribute: {
938  if (strcmp(localname, "Attribute") == 0 && parser->root_ns == (const char *) URI) {
939  parser->pop_state();
940 
941  // The old code added more values using the name and type as
942  // indexes to find the correct attribute. Use get() for that
943  // now. Or fix this code to keep a pointer to the to attribute...
944  D4Attributes *attrs = parser->top_attributes();
945  D4Attribute *attr = attrs->get(parser->dods_attr_name);
946  if (!attr) {
947  attr = new D4Attribute(parser->dods_attr_name, StringToD4AttributeType(parser->dods_attr_type));
948  attrs->add_attribute_nocopy(attr);
949  }
950  attr->add_value(parser->other_xml);
951 
952  parser->other_xml = ""; // Null this after use.
953  }
954  else {
955  if (parser->other_xml_depth == 0) {
956  D4ParserSax2::dmr_error(parser, "Expected an OtherXML attribute to end! Instead I found '%s'",
957  localname);
958  break;
959  }
960  parser->other_xml_depth--;
961 
962  parser->other_xml.append("</");
963  if (prefix) {
964  parser->other_xml.append((const char *) prefix);
965  parser->other_xml.append(":");
966  }
967  parser->other_xml.append(localname);
968  parser->other_xml.append(">");
969  }
970  break;
971  }
972 
973  case inside_enum_def:
974  if (is_not(localname, "Enumeration"))
975  D4ParserSax2::dmr_error(parser, "Expected an end Enumeration tag; found '%s' instead.", localname);
976  if (!parser->top_group())
978  "Expected a Group to be the current item, while finishing up an Enumeration.");
979  else {
980  // copy the pointer; not a deep copy
981  parser->top_group()->enum_defs()->add_enum_nocopy(parser->enum_def());
982  // Set the enum_def to null; next call to enum_def() will
983  // allocate a new object
984  parser->clear_enum_def();
985  parser->pop_state();
986  }
987  break;
988 
989  case inside_enum_const:
990  if (is_not(localname, "EnumConst"))
991  D4ParserSax2::dmr_error(parser, "Expected an end EnumConst tag; found '%s' instead.", localname);
992 
993  parser->pop_state();
994  break;
995 
996  case inside_dim_def: {
997  if (is_not(localname, "Dimension"))
998  D4ParserSax2::dmr_error(parser, "Expected an end Dimension tag; found '%s' instead.", localname);
999 
1000  if (!parser->top_group())
1001  D4ParserSax2::dmr_error(parser,
1002  "Expected a Group to be the current item, while finishing up an Dimension.");
1003 
1004  // FIXME Use the Group on the top of the group stack
1005  // copy the pointer; not a deep copy
1006  parser->top_group()->dims()->add_dim_nocopy(parser->dim_def());
1007  //parser->dmr()->root()->dims()->add_dim_nocopy(parser->dim_def());
1008  // Set the dim_def to null; next call to dim_def() will
1009  // allocate a new object. Calling 'clear' is important because
1010  // the cleanup method will free dim_def if it's not null and
1011  // we just copied the pointer in the add_dim_nocopy() call
1012  // above.
1013  parser->clear_dim_def();
1014  parser->pop_state();
1015  break;
1016  }
1017 
1018  case inside_simple_type:
1019  if (is_simple_type(get_type(localname))) {
1020  BaseType *btp = parser->top_basetype();
1021  parser->pop_basetype();
1022  parser->pop_attributes();
1023 
1024  BaseType *parent = 0;
1025  if (!parser->empty_basetype())
1026  parent = parser->top_basetype();
1027  else if (!parser->empty_group())
1028  parent = parser->top_group();
1029  else {
1030  dmr_fatal_error(parser, "Both the Variable and Groups stacks are empty while closing a %s element.",
1031  localname);
1032  delete btp;
1033  parser->pop_state();
1034  break;
1035  }
1036 
1037  if (parent->type() == dods_array_c)
1038  static_cast<Array*>(parent)->prototype()->add_var_nocopy(btp);
1039  else
1040  parent->add_var_nocopy(btp);
1041  }
1042  else
1043  D4ParserSax2::dmr_error(parser, "Expected an end tag for a simple type; found '%s' instead.", localname);
1044 
1045  parser->pop_state();
1046  break;
1047 
1048  case inside_dim:
1049  if (is_not(localname, "Dim"))
1050  D4ParserSax2::dmr_fatal_error(parser, "Expected an end Dim tag; found '%s' instead.", localname);
1051 
1052  parser->pop_state();
1053  break;
1054 
1055  case inside_map:
1056  if (is_not(localname, "Map"))
1057  D4ParserSax2::dmr_fatal_error(parser, "Expected an end Map tag; found '%s' instead.", localname);
1058 
1059  parser->pop_state();
1060  break;
1061 
1062  case inside_constructor: {
1063  if (strcmp(localname, "Structure") != 0 && strcmp(localname, "Sequence") != 0) {
1064  D4ParserSax2::dmr_error(parser, "Expected an end tag for a constructor; found '%s' instead.", localname);
1065  return;
1066  }
1067 
1068  BaseType *btp = parser->top_basetype();
1069  parser->pop_basetype();
1070  parser->pop_attributes();
1071 
1072  BaseType *parent = 0;
1073  if (!parser->empty_basetype())
1074  parent = parser->top_basetype();
1075  else if (!parser->empty_group())
1076  parent = parser->top_group();
1077  else {
1078  dmr_fatal_error(parser, "Both the Variable and Groups stacks are empty while closing a %s element.",
1079  localname);
1080  delete btp;
1081  parser->pop_state();
1082  break;
1083  }
1084 
1085  // TODO Why doesn't this code mirror the simple_var case and test
1086  // for the parent being an array? jhrg 10/13/13
1087  parent->add_var_nocopy(btp);
1088  parser->pop_state();
1089  break;
1090  }
1091 
1092  case not_dap4_element:
1093  if (parser->debug()) cerr << "End of non DAP4 element: " << localname << endl;
1094  parser->pop_state();
1095  break;
1096 
1097  case parser_unknown:
1098  parser->pop_state();
1099  break;
1100 
1101  case parser_error:
1102  case parser_fatal_error:
1103  break;
1104 
1105  case parser_end:
1106  // FIXME Error?
1107  break;
1108  }
1109 
1110  if (parser->debug()) cerr << "End element exit state: " << states[parser->get_state()] << endl;
1111 }
1112 
1116 void D4ParserSax2::dmr_get_characters(void * p, const xmlChar * ch, int len)
1117 {
1118  D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
1119 
1120  switch (parser->get_state()) {
1121  case inside_attribute_value:
1122  parser->char_data.append((const char *) (ch), len);
1123  DBG(cerr << "Characters: '" << parser->char_data << "'" << endl);
1124  break;
1125 
1126  case inside_other_xml_attribute:
1127  parser->other_xml.append((const char *) (ch), len);
1128  DBG(cerr << "Other XML Characters: '" << parser->other_xml << "'" << endl);
1129  break;
1130 
1131  default:
1132  break;
1133  }
1134 }
1135 
1140 void D4ParserSax2::dmr_ignoreable_whitespace(void *p, const xmlChar *ch, int len)
1141 {
1142  D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
1143 
1144  switch (parser->get_state()) {
1145  case inside_other_xml_attribute:
1146  parser->other_xml.append((const char *) (ch), len);
1147  break;
1148 
1149  default:
1150  break;
1151  }
1152 }
1153 
1159 void D4ParserSax2::dmr_get_cdata(void *p, const xmlChar *value, int len)
1160 {
1161  D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
1162 
1163  switch (parser->get_state()) {
1164  case inside_other_xml_attribute:
1165  parser->other_xml.append((const char *) (value), len);
1166  break;
1167 
1168  case parser_unknown:
1169  break;
1170 
1171  default:
1172  D4ParserSax2::dmr_error(parser, "Found a CData block but none are allowed by DAP4.");
1173 
1174  break;
1175  }
1176 }
1177 
1182 xmlEntityPtr D4ParserSax2::dmr_get_entity(void *, const xmlChar * name)
1183 {
1184  return xmlGetPredefinedEntity(name);
1185 }
1186 
1197 void D4ParserSax2::dmr_fatal_error(void * p, const char *msg, ...)
1198 {
1199  va_list args;
1200  D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
1201 
1202  parser->push_state(parser_fatal_error);
1203 
1204  va_start(args, msg);
1205  char str[1024];
1206  vsnprintf(str, 1024, msg, args);
1207  va_end(args);
1208 
1209  int line = xmlSAX2GetLineNumber(parser->context);
1210 
1211  if (!parser->error_msg.empty()) parser->error_msg += "\n";
1212  parser->error_msg += "At line " + long_to_string(line) + ": " + string(str);
1213 }
1214 
1215 void D4ParserSax2::dmr_error(void *p, const char *msg, ...)
1216 {
1217  va_list args;
1218  D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
1219 
1220  parser->push_state(parser_error);
1221 
1222  va_start(args, msg);
1223  char str[1024];
1224  vsnprintf(str, 1024, msg, args);
1225  va_end(args);
1226 
1227  int line = xmlSAX2GetLineNumber(parser->context);
1228 
1229  if (!parser->error_msg.empty()) parser->error_msg += "\n";
1230  parser->error_msg += "At line " + long_to_string(line) + ": " + string(str);
1231 }
1233 
1237 void D4ParserSax2::cleanup_parse()
1238 {
1239  bool wellFormed = context->wellFormed;
1240  bool valid = context->valid;
1241 
1242  context->sax = NULL;
1243  xmlFreeParserCtxt(context);
1244 
1245  delete d_enum_def;
1246  d_enum_def = 0;
1247 
1248  delete d_dim_def;
1249  d_dim_def = 0;
1250 
1251  // If there's an error, there may still be items on the stack at the
1252  // end of the parse.
1253  while (!btp_stack.empty()) {
1254  delete top_basetype();
1255  pop_basetype();
1256  }
1257 
1258  if (!wellFormed)
1259  throw Error("The DMR was not well formed. " + error_msg);
1260  else if (!valid)
1261  throw Error("The DMR was not valid." + error_msg);
1262  else if (get_state() == parser_error)
1263  throw Error(error_msg);
1264  else if (get_state() == parser_fatal_error)
1265  throw InternalErr(error_msg);
1266 }
1267 
1282 void D4ParserSax2::intern(istream &f, DMR *dest_dmr, bool debug)
1283 {
1284  d_debug = debug;
1285 
1286  // Code example from libxml2 docs re: read from a stream.
1287 
1288  if (!f.good())
1289  throw Error("Input stream not open or read error");
1290  if (!dest_dmr)
1291  throw InternalErr(__FILE__, __LINE__, "DMR object is null");
1292 
1293  d_dmr = dest_dmr; // dump values here
1294 
1295  const int size = 1024;
1296  char chars[size];
1297  int line = 1;
1298 
1299  f.getline(chars, size);
1300  int res = f.gcount();
1301  if (res == 0) throw Error("No input found while parsing the DMR.");
1302 
1303  if (debug) cerr << "line: (" << line++ << "): " << chars << endl;
1304 
1305  context = xmlCreatePushParserCtxt(&ddx_sax_parser, this, chars, res - 1, "stream");
1306  context->validate = true;
1307  push_state(parser_start);
1308 
1309  f.getline(chars, size);
1310  while ((f.gcount() > 0) && (get_state() != parser_end)) {
1311  if (debug) cerr << "line: (" << line++ << "): " << chars << endl;
1312  xmlParseChunk(context, chars, f.gcount() - 1, 0);
1313  f.getline(chars, size);
1314  }
1315 
1316  // This call ends the parse.
1317  xmlParseChunk(context, chars, 0, 1/*terminate*/);
1318 
1319  // This checks that the state on the parser stack is parser_end and throws
1320  // an exception if it's not (i.e., the loop exited with gcount() == 0).
1321  cleanup_parse();
1322 }
1323 
1334 void D4ParserSax2::intern(const string &document, DMR *dest_dmr, bool debug)
1335 {
1336  intern(document.c_str(), document.length(), dest_dmr, debug);
1337 }
1338 
1349 void D4ParserSax2::intern(const char *buffer, int size, DMR *dest_dmr, bool debug)
1350 {
1351  if (!(size > 0)) return;
1352 
1353  d_debug = debug;
1354 
1355  // Code example from libxml2 docs re: read from a stream.
1356 
1357  if (!dest_dmr) throw InternalErr(__FILE__, __LINE__, "DMR object is null");
1358  d_dmr = dest_dmr; // dump values in dest_dmr
1359 
1360  push_state(parser_start);
1361  context = xmlCreatePushParserCtxt(&ddx_sax_parser, this, buffer, size, "stream");
1362  context->validate = true;
1363  //push_state(parser_start);
1364  //xmlParseChunk(context, buffer, size, 0);
1365 
1366  // This call ends the parse.
1367  xmlParseChunk(context, buffer, 0, 1/*terminate*/);
1368 
1369  // This checks that the state on the parser stack is parser_end and throws
1370  // an exception if it's not (i.e., the loop exited with gcount() == 0).
1371  cleanup_parse();
1372 }
1373 
1374 } // namespace libdap
virtual BaseType * NewVariable(Type t, const string &name) const
static void dmr_end_document(void *parser)
void set_namespace(const string &ns)
Set the namespace for this DDS/DDX object/response.
Definition: DMR.h:158
bool is_valid_enum_value(long long value)
Definition: D4EnumDefs.cc:43
D4Dimension * find_dim(const string &path)
Find the dimension using a path. Using the DAP4 name syntax, lookup a dimension. The dimension must b...
Definition: D4Group.cc:274
D4Group * root()
Definition: DMR.cc:243
static void dmr_start_document(void *parser)
static xmlEntityPtr dmr_get_entity(void *parser, const xmlChar *name)
bool is_vector_type(Type t)
Returns true if the instance is a vector (i.e., array) type variable.
Definition: util.cc:816
void set_request_xml_base(const string &xb)
Definition: DMR.h:152
Type
Identifies the data type.
Definition: Type.h:94
A class for software fault reporting.
Definition: InternalErr.h:64
static std::string getDapNamespaceString(DAPVersion version)
static void dmr_start_element(void *parser, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI, int nb_namespaces, const xmlChar **namespaces, int nb_attributes, int nb_defaulted, const xmlChar **attributes)
static void dmr_ignoreable_whitespace(void *parser, const xmlChar *ch, int len)
void add_dim_nocopy(D4Dimension *dim)
Definition: D4Dimensions.h:160
virtual D4BaseTypeFactory * factory()
Definition: DMR.h:125
static void dmr_get_cdata(void *parser, const xmlChar *value, int len)
virtual Type type() const
Returns the type of the class instance.
Definition: BaseType.cc:310
ObjectType get_type(const string &value)
Definition: mime_util.cc:326
string D4type_name(Type t)
Returns the type of the class instance as a string. Supports all DAP4 types and not the DAP2-only typ...
Definition: util.cc:693
bool is_simple_type(Type t)
Returns true if the instance is a numeric, string or URL type variable.
Definition: util.cc:774
virtual D4Attributes * attributes()
Definition: BaseType.cc:544
static void dmr_get_characters(void *parser, const xmlChar *ch, int len)
The basic data type for the DODS DAP types.
Definition: BaseType.h:117
void set_dap_version(const string &version_string)
Definition: DMR.cc:255
virtual std::string FQN() const
Definition: D4Group.cc:182
D4Attribute * get(const string &fqn)
void add_map(D4Map *map)
Definition: D4Maps.h:115
A class for error processing.
Definition: Error.h:90
D4EnumDefs * enum_defs()
Get the enumerations defined for this Group.
Definition: D4Group.h:97
static void dmr_fatal_error(void *parser, const char *msg,...)
bool is_integer_type(Type t)
Definition: util.cc:905
D4Dimensions * dims()
Get the dimensions defined for this Group.
Definition: D4Group.h:82