libdap Updated for version 3.20.10
libdap4 is an implementation of OPeNDAP's DAP protocol.
D4ParserSax2.cc
1// -*- mode: c++; c-basic-offset:4 -*-
2
3// This file is part of libdap, A C++ implementation of the OPeNDAP Data
4// Access Protocol.
5
6// Copyright (c) 2012 OPeNDAP, Inc.
7// Author: James Gallagher <jgallagher@opendap.org>
8//
9// This library is free software; you can redistribute it and/or
10// modify it under the terms of the GNU Lesser General Public
11// License as published by the Free Software Foundation; either
12// version 2.1 of the License, or (at your option) any later version.
13//
14// This library is distributed in the hope that it will be useful,
15// but WITHOUT ANY WARRANTY; without even the implied warranty of
16// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17// Lesser General Public License for more details.
18//
19// You should have received a copy of the GNU Lesser General Public
20// License along with this library; if not, write to the Free Software
21// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22//
23// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
24
25#include "config.h"
26
27//#define DODS_DEBUG 1
28
29#include <iostream>
30#include <sstream>
31
32#include <cstring>
33#include <cstdarg>
34#include <cassert>
35
36#include <libxml2/libxml/parserInternals.h>
37
38#include "DMR.h"
39
40#include "BaseType.h"
41#include "Array.h"
42#include "D4Group.h"
43#include "D4Attributes.h"
44#include "D4Maps.h"
45#include "D4Enum.h"
46#include "D4BaseTypeFactory.h"
47
48#include "DapXmlNamespaces.h"
49#include "D4ParserSax2.h"
50
51#include "util.h"
52#include "debug.h"
53
54namespace libdap {
55
56static const char *states[] = {
57 "parser_start",
58
59 "inside_dataset",
60
61 // inside_group is the state just after parsing the start of a Group
62 // element.
63 "inside_group",
64
65 "inside_attribute_container",
66 "inside_attribute",
67 "inside_attribute_value",
68 "inside_other_xml_attribute",
69
70 "inside_enum_def",
71 "inside_enum_const",
72
73 "inside_dim_def",
74
75 // This covers Byte, ..., Url, Opaque
76 "inside_simple_type",
77
78 // "inside_array",
79 "inside_dim",
80 "inside_map",
81
82 "inside_constructor",
83
84 "not_dap4_element",
85
86 "parser_unknown",
87 "parser_error",
88 "parser_fatal_error",
89
90 "parser_end"
91};
92
93static bool is_not(const char *name, const char *tag)
94{
95 return strcmp(name, tag) != 0;
96}
97
106D4EnumDef *
107D4ParserSax2::enum_def()
108{
109 if (!d_enum_def) d_enum_def = new D4EnumDef;
110
111 return d_enum_def;
112}
113
120D4Dimension *
121D4ParserSax2::dim_def() {
122 if (!d_dim_def) d_dim_def = new D4Dimension;
123
124 return d_dim_def;
125}
126
132void D4ParserSax2::transfer_xml_attrs(const xmlChar **attributes, int nb_attributes)
133{
134 if (!xml_attrs.empty())
135 xml_attrs.clear(); // erase old attributes
136
137 // Make a value using the attribute name and the prefix, namespace URI
138 // and the value. The prefix might be null.
139 unsigned int index = 0;
140 for (int i = 0; i < nb_attributes; ++i, index += 5) {
141 xml_attrs.insert(map<string, XMLAttribute>::value_type(string((const char *)attributes[index]),
142 XMLAttribute(attributes + index + 1)));
143
144 DBG(cerr << "XML Attribute '" << (const char *)attributes[index] << "': "
145 << xml_attrs[(const char *)attributes[index]].value << endl);
146 }
147}
148
155void D4ParserSax2::transfer_xml_ns(const xmlChar **namespaces, int nb_namespaces)
156{
157 // make a value with the prefix and namespace URI. The prefix might be null.
158 for (int i = 0; i < nb_namespaces; ++i) {
159 namespace_table.insert(map<string, string>::value_type(namespaces[i * 2] != 0 ? (const char *)namespaces[i * 2] : "",
160 (const char *)namespaces[i * 2 + 1]));
161 }
162}
163
170bool D4ParserSax2::check_required_attribute(const string & attr)
171{
172 if (xml_attrs.find(attr) == xml_attrs.end()) {
173 dmr_error(this, "Required attribute '%s' not found.", attr.c_str());
174 return false;
175 }
176 else
177 return true;
178}
179
186bool D4ParserSax2::check_attribute(const string & attr)
187{
188 return (xml_attrs.find(attr) != xml_attrs.end());
189}
190
191bool D4ParserSax2::process_dimension_def(const char *name, const xmlChar **attrs, int nb_attributes)
192{
193 if (is_not(name, "Dimension"))
194 return false;
195
196 transfer_xml_attrs(attrs, nb_attributes);
197
198 if (!(check_required_attribute("name") && check_required_attribute("size"))) {
199 dmr_error(this, "The required attribute 'name' or 'size' was missing from a Dimension element.");
200 return false;
201 }
202
203 // This getter (dim_def) allocates a new object if needed.
204 dim_def()->set_name(xml_attrs["name"].value);
205 try {
206 dim_def()->set_size(xml_attrs["size"].value);
207 }
208 catch (Error &e) {
209 dmr_error(this, "%s", e.get_error_message().c_str());
210 return false;
211 }
212
213 return true;
214}
215
233bool D4ParserSax2::process_dimension(const char *name, const xmlChar **attrs, int nb_attributes)
234{
235 if (is_not(name, "Dim"))
236 return false;
237
238 transfer_xml_attrs(attrs, nb_attributes);
239
240 if (check_attribute("size") && check_attribute("name")) {
241 dmr_error(this, "Only one of 'size' and 'name' are allowed in a Dim element, but both were used.");
242 return false;
243 }
244 if (!(check_attribute("size") || check_attribute("name"))) {
245 dmr_error(this, "Either 'size' or 'name' must be used in a Dim element.");
246 return false;
247 }
248
249 if (!top_basetype()->is_vector_type()) {
250 // Make the top BaseType* an array
251 BaseType *b = top_basetype();
252 pop_basetype();
253
254 Array *a = static_cast<Array*>(dmr()->factory()->NewVariable(dods_array_c, b->name()));
255 a->set_is_dap4(true);
256 a->add_var_nocopy(b);
257 a->set_attributes_nocopy(b->attributes());
258 // trick: instead of popping b's attributes, copying them and then pushing
259 // a's copy, just move the pointer (but make sure there's only one object that
260 // references that pointer).
261 b->set_attributes_nocopy(0);
262
263 push_basetype(a);
264 }
265
266 assert(top_basetype()->is_vector_type());
267
268 Array *a = static_cast<Array*>(top_basetype());
269 if (check_attribute("size")) {
270 a->append_dim(atoi(xml_attrs["size"].value.c_str())); // low budget code for now. jhrg 8/20/13
271 return true;
272 }
273 else if (check_attribute("name")) {
274 string name = xml_attrs["name"].value;
275
276 D4Dimension *dim = 0;
277 if (name[0] == '/') // lookup the Dimension in the root group
278 dim = dmr()->root()->find_dim(name);
279 else // get enclosing Group and lookup Dimension there
280 dim = top_group()->find_dim(name);
281
282 if (!dim)
283 throw Error("The dimension '" + name + "' was not found while parsing the variable '" + a->name() + "'.");
284 a->append_dim(dim);
285 return true;
286 }
287
288 return false;
289}
290
291bool D4ParserSax2::process_map(const char *name, const xmlChar **attrs, int nb_attributes)
292{
293 if (is_not(name, "Map"))
294 return false;
295
296 transfer_xml_attrs(attrs, nb_attributes);
297
298 if (!check_attribute("name")) {
299 dmr_error(this, "The 'name' attribute must be used in a Map element.");
300 return false;
301 }
302
303 if (!top_basetype()->is_vector_type()) {
304 // Make the top BaseType* an array
305 BaseType *b = top_basetype();
306 pop_basetype();
307
308 Array *a = static_cast<Array*>(dmr()->factory()->NewVariable(dods_array_c, b->name()));
309 a->set_is_dap4(true);
310 a->add_var_nocopy(b);
311 a->set_attributes_nocopy(b->attributes());
312 // trick: instead of popping b's attributes, copying them and then pushing
313 // a's copy, just move the pointer (but make sure there's only one object that
314 // references that pointer).
315 b->set_attributes_nocopy(0);
316
317 push_basetype(a);
318 }
319
320 assert(top_basetype()->is_vector_type());
321
322 Array *a = static_cast<Array*>(top_basetype());
323
324 string map_name = xml_attrs["name"].value;
325 if (xml_attrs["name"].value[0] != '/')
326 map_name = top_group()->FQN() + map_name;
327
328 Array *map_source = 0; // The array variable that holds the data for the Map
329
330 if (map_name[0] == '/') // lookup the Map in the root group
331 map_source = dmr()->root()->find_map_source(map_name);
332 else // get enclosing Group and lookup Map there
333 map_source = top_group()->find_map_source(map_name);
334
335 // Change: If the parser is in 'strict' mode (the default) and the Array named by
336 // the Map cannot be fond, it is an error. If 'strict' mode is false (permissive
337 // mode), then this is not an error. However, the Array referenced by the Map will
338 // be null. This is a change in the parser's behavior to accommodate requests for
339 // Arrays that include Maps that do not also include the Map(s) in the request.
340 // See https://opendap.atlassian.net/browse/HYRAX-98. jhrg 4/13/16
341 if (!map_source && d_strict)
342 throw Error("The Map '" + map_name + "' was not found while parsing the variable '" + a->name() + "'.");
343
344 a->maps()->add_map(new D4Map(map_name, map_source));
345
346 return true;
347}
348
349bool D4ParserSax2::process_group(const char *name, const xmlChar **attrs, int nb_attributes)
350{
351 if (is_not(name, "Group"))
352 return false;
353
354 transfer_xml_attrs(attrs, nb_attributes);
355
356 if (!check_required_attribute("name")) {
357 dmr_error(this, "The required attribute 'name' was missing from a Group element.");
358 return false;
359 }
360
361 BaseType *btp = dmr()->factory()->NewVariable(dods_group_c, xml_attrs["name"].value);
362 if (!btp) {
363 dmr_fatal_error(this, "Could not instantiate the Group '%s'.", xml_attrs["name"].value.c_str());
364 return false;
365 }
366
367 D4Group *grp = static_cast<D4Group*>(btp);
368
369 // Need to set this to get the D4Attribute behavior in the type classes
370 // shared between DAP2 and DAP4. jhrg 4/18/13
371 grp->set_is_dap4(true);
372
373 // link it up and change the current group
374 D4Group *parent = top_group();
375 if (!parent) {
376 dmr_fatal_error(this, "No Group on the Group stack.");
377 return false;
378 }
379
380 grp->set_parent(parent);
381 parent->add_group_nocopy(grp);
382
383 push_group(grp);
384 push_attributes(grp->attributes());
385 return true;
386}
387
394inline bool D4ParserSax2::process_attribute(const char *name, const xmlChar **attrs, int nb_attributes)
395{
396 if (is_not(name, "Attribute"))
397 return false;
398
399 // These methods set the state to parser_error if a problem is found.
400 transfer_xml_attrs(attrs, nb_attributes);
401
402 // add error
403 if (!(check_required_attribute(string("name")) && check_required_attribute(string("type")))) {
404 dmr_error(this, "The required attribute 'name' or 'type' was missing from an Attribute element.");
405 return false;
406 }
407
408 if (xml_attrs["type"].value == "Container") {
409 push_state(inside_attribute_container);
410
411 DBG(cerr << "Pushing attribute container " << xml_attrs["name"].value << endl);
412 D4Attribute *child = new D4Attribute(xml_attrs["name"].value, attr_container_c);
413
414 D4Attributes *tos = top_attributes();
415 // add return
416 if (!tos) {
417 delete child;
418 dmr_fatal_error(this, "Expected an Attribute container on the top of the attribute stack.");
419 return false;
420 }
421
422 tos->add_attribute_nocopy(child);
423 push_attributes(child->attributes());
424 }
425 else if (xml_attrs["type"].value == "OtherXML") {
426 push_state(inside_other_xml_attribute);
427
428 dods_attr_name = xml_attrs["name"].value;
429 dods_attr_type = xml_attrs["type"].value;
430 }
431 else {
432 push_state(inside_attribute);
433
434 dods_attr_name = xml_attrs["name"].value;
435 dods_attr_type = xml_attrs["type"].value;
436 }
437
438 return true;
439}
440
446inline bool D4ParserSax2::process_enum_def(const char *name, const xmlChar **attrs, int nb_attributes)
447{
448 if (is_not(name, "Enumeration"))
449 return false;
450
451 transfer_xml_attrs(attrs, nb_attributes);
452
453 if (!(check_required_attribute("name") && check_required_attribute("basetype"))) {
454 dmr_error(this, "The required attribute 'name' or 'basetype' was missing from an Enumeration element.");
455 return false;
456 }
457
458 Type t = get_type(xml_attrs["basetype"].value.c_str());
459 if (!is_integer_type(t)) {
460 dmr_error(this, "The Enumeration '%s' must have an integer type, instead the type '%s' was used.",
461 xml_attrs["name"].value.c_str(), xml_attrs["basetype"].value.c_str());
462 return false;
463 }
464
465 // This getter allocates a new object if needed.
466 string enum_def_path = xml_attrs["name"].value;
467#if 0
468 // Use FQNs when things are referenced, not when they are defined
469 if (xml_attrs["name"].value[0] != '/')
470 enum_def_path = top_group()->FQN() + enum_def_path;
471#endif
472 enum_def()->set_name(enum_def_path);
473 enum_def()->set_type(t);
474
475 return true;
476}
477
478inline bool D4ParserSax2::process_enum_const(const char *name, const xmlChar **attrs, int nb_attributes)
479{
480 if (is_not(name, "EnumConst"))
481 return false;
482
483 // These methods set the state to parser_error if a problem is found.
484 transfer_xml_attrs(attrs, nb_attributes);
485
486 if (!(check_required_attribute("name") && check_required_attribute("value"))) {
487 dmr_error(this, "The required attribute 'name' or 'value' was missing from an EnumConst element.");
488 return false;
489 }
490
491 istringstream iss(xml_attrs["value"].value);
492 long long value = 0;
493 iss >> skipws >> value;
494 if (iss.fail() || iss.bad()) {
495 dmr_error(this, "Expected an integer value for an Enumeration constant, got '%s' instead.",
496 xml_attrs["value"].value.c_str());
497 }
498 else if (!enum_def()->is_valid_enum_value(value)) {
499 dmr_error(this, "In an Enumeration constant, the value '%s' cannot fit in a variable of type '%s'.",
500 xml_attrs["value"].value.c_str(), D4type_name(d_enum_def->type()).c_str());
501 }
502 else {
503 // unfortunate choice of names... args are 'label' and 'value'
504 enum_def()->add_value(xml_attrs["name"].value, value);
505 }
506
507 return true;
508}
509
515inline bool D4ParserSax2::process_variable(const char *name, const xmlChar **attrs, int nb_attributes)
516{
517 Type t = get_type(name);
518 if (is_simple_type(t)) {
519 process_variable_helper(t, inside_simple_type, attrs, nb_attributes);
520 return true;
521 }
522 else {
523 switch(t) {
524 case dods_structure_c:
525 process_variable_helper(t, inside_constructor, attrs, nb_attributes);
526 return true;
527
528 case dods_sequence_c:
529 process_variable_helper(t, inside_constructor, attrs, nb_attributes);
530 return true;
531
532 default:
533 return false;
534 }
535 }
536}
537
545void D4ParserSax2::process_variable_helper(Type t, ParseState s, const xmlChar **attrs, int nb_attributes)
546{
547 transfer_xml_attrs(attrs, nb_attributes);
548
549 if (check_required_attribute("name")) {
550 BaseType *btp = dmr()->factory()->NewVariable(t, xml_attrs["name"].value);
551 if (!btp) {
552 dmr_fatal_error(this, "Could not instantiate the variable '%s'.", xml_attrs["name"].value.c_str());
553 return;
554 }
555
556 if ((t == dods_enum_c) && check_required_attribute("enum")) {
557 D4EnumDef *enum_def = 0;
558 string enum_path = xml_attrs["enum"].value;
559 if (enum_path[0] == '/')
560 enum_def = dmr()->root()->find_enum_def(enum_path);
561 else
562 enum_def = top_group()->find_enum_def(enum_path);
563
564 if (!enum_def)
565 dmr_fatal_error(this, "Could not find the Enumeration definition '%s'.", enum_path.c_str());
566
567 static_cast<D4Enum*>(btp)->set_enumeration(enum_def);
568 }
569
570 btp->set_is_dap4(true); // see comment above
571 push_basetype(btp);
572
573 push_attributes(btp->attributes());
574
575 push_state(s);
576 }
577}
578
585
591{
592 D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
593 parser->d_error_msg = "";
594 parser->char_data = "";
595
596 // Set this in intern_helper so that the loop test for the parser_end
597 // state works for the first iteration. It seems like XMLParseChunk calls this
598 // function on it's first run. jhrg 9/16/13
599 // parser->push_state(parser_start);
600
601 parser->push_attributes(parser->dmr()->root()->attributes());
602
603 if (parser->debug()) cerr << "Parser start state: " << states[parser->get_state()] << endl;
604}
605
609{
610 D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
611
612 if (parser->debug()) cerr << "Parser end state: " << states[parser->get_state()] << endl;
613
614 if (parser->get_state() != parser_end)
615 D4ParserSax2::dmr_error(parser, "The document contained unbalanced tags.");
616
617 // If we've found any sort of error, don't make the DMR; intern() will
618 // take care of the error.
619 if (parser->get_state() == parser_error || parser->get_state() == parser_fatal_error)
620 return;
621
622 if (!parser->empty_basetype() || parser->empty_group())
623 D4ParserSax2::dmr_error(parser, "The document did not contain a valid root Group or contained unbalanced tags.");
624
625 parser->pop_group(); // leave the stack 'clean'
626 parser->pop_attributes();
627}
628
642void D4ParserSax2::dmr_start_element(void *p, const xmlChar *l, const xmlChar *prefix, const xmlChar *URI,
643 int nb_namespaces, const xmlChar **namespaces, int nb_attributes, int /*nb_defaulted*/,
644 const xmlChar **attributes)
645{
646 D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
647 const char *localname = (const char *) l;
648
649 if (parser->debug()) cerr << "Start element " << localname << " prefix: "<< (prefix?(char *)prefix:"null") << " ns: "<< (URI?(char *)URI:"null")
650 << " (state: " << states[parser->get_state()] << ")" << endl;
651
652 if(parser->get_state() != parser_error){
653 string dap4_ns_name = DapXmlNamspaces::getDapNamespaceString(DAP_4_0);
654 if (parser->debug()) cerr << "dap4_ns_name: " << dap4_ns_name << endl;
655
656 string this_element_ns_name = (URI != 0) ? ((char *)URI) : "";
657 if (parser->debug()) cerr << "this_element_ns_name: " << this_element_ns_name << endl;
658
659 if(this_element_ns_name.compare(dap4_ns_name)){
660 if (parser->debug()) cerr << "Start of non DAP4 element: " << localname << " detected." << endl;
661 parser->push_state(not_dap4_element);
662 // return;
663 }
664 }
665
666
667 switch (parser->get_state()) {
668 case parser_start:
669 if (is_not(localname, "Dataset"))
670 D4ParserSax2::dmr_error(parser, "Expected DMR to start with a Dataset element; found '%s' instead.", localname);
671
672 parser->root_ns = URI ? (const char *) URI : "";
673 parser->transfer_xml_attrs(attributes, nb_attributes);
674
675 if (parser->check_required_attribute(string("name")))
676 parser->dmr()->set_name(parser->xml_attrs["name"].value);
677
678 if (parser->check_attribute("dapVersion"))
679 parser->dmr()->set_dap_version(parser->xml_attrs["dapVersion"].value);
680
681 if (parser->check_attribute("dmrVersion"))
682 parser->dmr()->set_dmr_version(parser->xml_attrs["dmrVersion"].value);
683
684 if (parser->check_attribute("base"))
685 parser->dmr()->set_request_xml_base(parser->xml_attrs["base"].value);
686
687 if (!parser->root_ns.empty())
688 parser->dmr()->set_namespace(parser->root_ns);
689
690 // Push the root Group on the stack
691 parser->push_group(parser->dmr()->root());
692
693 parser->push_state(inside_dataset);
694
695 break;
696
697 // Both inside dataset and inside group can have the same stuff.
698 // The difference is that the Dataset holds the root group, which
699 // must be present; other groups are optional
700 case inside_dataset:
701 case inside_group:
702 if (parser->process_enum_def(localname, attributes, nb_attributes))
703 parser->push_state(inside_enum_def);
704 else if (parser->process_dimension_def(localname, attributes, nb_attributes))
705 parser->push_state(inside_dim_def);
706 else if (parser->process_group(localname, attributes, nb_attributes))
707 parser->push_state(inside_group);
708 else if (parser->process_variable(localname, attributes, nb_attributes))
709 // This will push either inside_simple_type or inside_structure
710 // onto the parser state stack.
711 break;
712 else if (parser->process_attribute(localname, attributes, nb_attributes))
713 // This will push either inside_attribute, inside_attribute_container
714 // or inside_otherxml_attribute onto the parser state stack
715 break;
716 else
717 D4ParserSax2::dmr_error(parser, "Expected an Attribute, Enumeration, Dimension, Group or variable element; found '%s' instead.", localname);
718 break;
719
720 case inside_attribute_container:
721 if (parser->process_attribute(localname, attributes, nb_attributes))
722 break;
723 else
724 D4ParserSax2::dmr_error(parser, "Expected an Attribute element; found '%s' instead.", localname);
725 break;
726
727 case inside_attribute:
728 if (parser->process_attribute(localname, attributes, nb_attributes))
729 break;
730 else if (strcmp(localname, "Value") == 0)
731 parser->push_state(inside_attribute_value);
732 else
733 dmr_error(parser, "Expected an 'Attribute' or 'Value' element; found '%s' instead.", localname);
734 break;
735
736 case inside_attribute_value:
737 // Attribute values are processed by the end element code.
738 break;
739
740 case inside_other_xml_attribute:
741 parser->other_xml_depth++;
742
743 // Accumulate the elements here
744 parser->other_xml.append("<");
745 if (prefix) {
746 parser->other_xml.append((const char *) prefix);
747 parser->other_xml.append(":");
748 }
749 parser->other_xml.append(localname);
750
751 if (nb_namespaces != 0) {
752 parser->transfer_xml_ns(namespaces, nb_namespaces);
753
754 for (map<string, string>::iterator i = parser->namespace_table.begin();
755 i != parser->namespace_table.end(); ++i) {
756 parser->other_xml.append(" xmlns");
757 if (!i->first.empty()) {
758 parser->other_xml.append(":");
759 parser->other_xml.append(i->first);
760 }
761 parser->other_xml.append("=\"");
762 parser->other_xml.append(i->second);
763 parser->other_xml.append("\"");
764 }
765 }
766
767 if (nb_attributes != 0) {
768 parser->transfer_xml_attrs(attributes, nb_attributes);
769 for (XMLAttrMap::iterator i = parser->xml_attr_begin(); i != parser->xml_attr_end(); ++i) {
770 parser->other_xml.append(" ");
771 if (!i->second.prefix.empty()) {
772 parser->other_xml.append(i->second.prefix);
773 parser->other_xml.append(":");
774 }
775 parser->other_xml.append(i->first);
776 parser->other_xml.append("=\"");
777 parser->other_xml.append(i->second.value);
778 parser->other_xml.append("\"");
779 }
780 }
781
782 parser->other_xml.append(">");
783 break;
784
785 case inside_enum_def:
786 // process an EnumConst element
787 if (parser->process_enum_const(localname, attributes, nb_attributes))
788 parser->push_state(inside_enum_const);
789 else
790 dmr_error(parser, "Expected an 'EnumConst' element; found '%s' instead.", localname);
791 break;
792
793 case inside_enum_const:
794 // No content; nothing to do
795 break;
796
797 case inside_dim_def:
798 // No content; nothing to do
799 break;
800#if 0
801 case inside_dimension:
802 // No content.
803 break;
804#endif
805 case inside_dim:
806 // No content.
807 break;
808
809 case inside_map:
810 // No content.
811 break;
812
813 case inside_simple_type:
814 if (parser->process_attribute(localname, attributes, nb_attributes))
815 break;
816 else if (parser->process_dimension(localname, attributes, nb_attributes))
817 parser->push_state(inside_dim);
818 else if (parser->process_map(localname, attributes, nb_attributes))
819 parser->push_state(inside_map);
820 else
821 dmr_error(parser, "Expected an 'Attribute', 'Dim' or 'Map' element; found '%s' instead.", localname);
822 break;
823
824 case inside_constructor:
825 if (parser->process_variable(localname, attributes, nb_attributes))
826 // This will push either inside_simple_type or inside_structure
827 // onto the parser state stack.
828 break;
829 else if (parser->process_attribute(localname, attributes, nb_attributes))
830 break;
831 else if (parser->process_dimension(localname, attributes, nb_attributes))
832 parser->push_state(inside_dim);
833 else if (parser->process_map(localname, attributes, nb_attributes))
834 parser->push_state(inside_map);
835 else
836 D4ParserSax2::dmr_error(parser, "Expected an Attribute, Dim, Map or variable element; found '%s' instead.", localname);
837 break;
838
839 case not_dap4_element:
840 if (parser->debug()) cerr << "Inside non DAP4 element. localname: " << localname << endl;
841 break;
842
843 case parser_unknown:
844 // FIXME?
845 // *** Never used? If so remove/error
846 parser->push_state(parser_unknown);
847 break;
848
849 case parser_error:
850 case parser_fatal_error:
851 break;
852
853 case parser_end:
854 // FIXME Error?
855 break;
856 }
857
858 if (parser->debug()) cerr << "Start element exit state: " << states[parser->get_state()] << endl;
859}
860
861void D4ParserSax2::dmr_end_element(void *p, const xmlChar *l, const xmlChar *prefix, const xmlChar *URI)
862{
863 D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
864 const char *localname = (const char *) l;
865
866 if (parser->debug())
867 cerr << "End element " << localname << " (state " << states[parser->get_state()] << ")" << endl;
868
869 switch (parser->get_state()) {
870 case parser_start:
871 dmr_fatal_error(parser, "Unexpected state, inside start state while processing element '%s'.", localname);
872 break;
873
874 case inside_dataset:
875 if (is_not(localname, "Dataset"))
876 D4ParserSax2::dmr_error(parser, "Expected an end Dataset tag; found '%s' instead.", localname);
877
878 parser->pop_state();
879 if (parser->get_state() != parser_start)
880 dmr_fatal_error(parser, "Unexpected state, expected start state.");
881 else {
882 parser->pop_state();
883 parser->push_state(parser_end);
884 }
885 break;
886
887 case inside_group: {
888 if (is_not(localname, "Group"))
889 D4ParserSax2::dmr_error(parser, "Expected an end tag for a Group; found '%s' instead.", localname);
890
891 if (!parser->empty_basetype() || parser->empty_group())
892 D4ParserSax2::dmr_error(parser,
893 "The document did not contain a valid root Group or contained unbalanced tags.");
894
895 parser->pop_group();
896 parser->pop_state();
897 break;
898 }
899
900 case inside_attribute_container:
901 if (is_not(localname, "Attribute"))
902 D4ParserSax2::dmr_error(parser, "Expected an end Attribute tag; found '%s' instead.", localname);
903
904 parser->pop_state();
905 parser->pop_attributes();
906 break;
907
908 case inside_attribute:
909 if (is_not(localname, "Attribute"))
910 D4ParserSax2::dmr_error(parser, "Expected an end Attribute tag; found '%s' instead.", localname);
911
912 parser->pop_state();
913 break;
914
915 case inside_attribute_value: {
916 if (is_not(localname, "Value"))
917 D4ParserSax2::dmr_error(parser, "Expected an end value tag; found '%s' instead.", localname);
918
919 parser->pop_state();
920
921 // The old code added more values using the name and type as
922 // indexes to find the correct attribute. Use get() for that
923 // now. Or fix this code to keep a pointer to the to attribute...
924 D4Attributes *attrs = parser->top_attributes();
925 D4Attribute *attr = attrs->get(parser->dods_attr_name);
926 if (!attr) {
927 attr = new D4Attribute(parser->dods_attr_name, StringToD4AttributeType(parser->dods_attr_type));
928 attrs->add_attribute_nocopy(attr);
929 }
930 attr->add_value(parser->char_data);
931
932 parser->char_data = ""; // Null this after use.
933 break;
934 }
935
936 case inside_other_xml_attribute: {
937 if (strcmp(localname, "Attribute") == 0 && parser->root_ns == (const char *) URI) {
938 parser->pop_state();
939
940 // The old code added more values using the name and type as
941 // indexes to find the correct attribute. Use get() for that
942 // now. Or fix this code to keep a pointer to the to attribute...
943 D4Attributes *attrs = parser->top_attributes();
944 D4Attribute *attr = attrs->get(parser->dods_attr_name);
945 if (!attr) {
946 attr = new D4Attribute(parser->dods_attr_name, StringToD4AttributeType(parser->dods_attr_type));
947 attrs->add_attribute_nocopy(attr);
948 }
949 attr->add_value(parser->other_xml);
950
951 parser->other_xml = ""; // Null this after use.
952 }
953 else {
954 if (parser->other_xml_depth == 0) {
955 D4ParserSax2::dmr_error(parser, "Expected an OtherXML attribute to end! Instead I found '%s'",
956 localname);
957 break;
958 }
959 parser->other_xml_depth--;
960
961 parser->other_xml.append("</");
962 if (prefix) {
963 parser->other_xml.append((const char *) prefix);
964 parser->other_xml.append(":");
965 }
966 parser->other_xml.append(localname);
967 parser->other_xml.append(">");
968 }
969 break;
970 }
971
972 case inside_enum_def:
973 if (is_not(localname, "Enumeration"))
974 D4ParserSax2::dmr_error(parser, "Expected an end Enumeration tag; found '%s' instead.", localname);
975 if (!parser->top_group())
977 "Expected a Group to be the current item, while finishing up an Enumeration.");
978 else {
979 // copy the pointer; not a deep copy
980 parser->top_group()->enum_defs()->add_enum_nocopy(parser->enum_def());
981 // Set the enum_def to null; next call to enum_def() will
982 // allocate a new object
983 parser->clear_enum_def();
984 parser->pop_state();
985 }
986 break;
987
988 case inside_enum_const:
989 if (is_not(localname, "EnumConst"))
990 D4ParserSax2::dmr_error(parser, "Expected an end EnumConst tag; found '%s' instead.", localname);
991
992 parser->pop_state();
993 break;
994
995 case inside_dim_def: {
996 if (is_not(localname, "Dimension"))
997 D4ParserSax2::dmr_error(parser, "Expected an end Dimension tag; found '%s' instead.", localname);
998
999 if (!parser->top_group())
1000 D4ParserSax2::dmr_error(parser,
1001 "Expected a Group to be the current item, while finishing up an Dimension.");
1002
1003 // FIXME Use the Group on the top of the group stack
1004 // copy the pointer; not a deep copy
1005 parser->top_group()->dims()->add_dim_nocopy(parser->dim_def());
1006 //parser->dmr()->root()->dims()->add_dim_nocopy(parser->dim_def());
1007 // Set the dim_def to null; next call to dim_def() will
1008 // allocate a new object. Calling 'clear' is important because
1009 // the cleanup method will free dim_def if it's not null and
1010 // we just copied the pointer in the add_dim_nocopy() call
1011 // above.
1012 parser->clear_dim_def();
1013 parser->pop_state();
1014 break;
1015 }
1016
1017 case inside_simple_type:
1018 if (is_simple_type(get_type(localname))) {
1019 BaseType *btp = parser->top_basetype();
1020 parser->pop_basetype();
1021 parser->pop_attributes();
1022
1023 BaseType *parent = 0;
1024 if (!parser->empty_basetype())
1025 parent = parser->top_basetype();
1026 else if (!parser->empty_group())
1027 parent = parser->top_group();
1028 else {
1029 dmr_fatal_error(parser, "Both the Variable and Groups stacks are empty while closing a %s element.",
1030 localname);
1031 delete btp;
1032 parser->pop_state();
1033 break;
1034 }
1035
1036 if (parent->type() == dods_array_c)
1037 static_cast<Array*>(parent)->prototype()->add_var_nocopy(btp);
1038 else
1039 parent->add_var_nocopy(btp);
1040 }
1041 else
1042 D4ParserSax2::dmr_error(parser, "Expected an end tag for a simple type; found '%s' instead.", localname);
1043
1044 parser->pop_state();
1045 break;
1046
1047 case inside_dim:
1048 if (is_not(localname, "Dim"))
1049 D4ParserSax2::dmr_fatal_error(parser, "Expected an end Dim tag; found '%s' instead.", localname);
1050
1051 parser->pop_state();
1052 break;
1053
1054 case inside_map:
1055 if (is_not(localname, "Map"))
1056 D4ParserSax2::dmr_fatal_error(parser, "Expected an end Map tag; found '%s' instead.", localname);
1057
1058 parser->pop_state();
1059 break;
1060
1061 case inside_constructor: {
1062 if (strcmp(localname, "Structure") != 0 && strcmp(localname, "Sequence") != 0) {
1063 D4ParserSax2::dmr_error(parser, "Expected an end tag for a constructor; found '%s' instead.", localname);
1064 return;
1065 }
1066
1067 BaseType *btp = parser->top_basetype();
1068 parser->pop_basetype();
1069 parser->pop_attributes();
1070
1071 BaseType *parent = 0;
1072 if (!parser->empty_basetype())
1073 parent = parser->top_basetype();
1074 else if (!parser->empty_group())
1075 parent = parser->top_group();
1076 else {
1077 dmr_fatal_error(parser, "Both the Variable and Groups stacks are empty while closing a %s element.",
1078 localname);
1079 delete btp;
1080 parser->pop_state();
1081 break;
1082 }
1083
1084 // TODO Why doesn't this code mirror the simple_var case and test
1085 // for the parent being an array? jhrg 10/13/13
1086 parent->add_var_nocopy(btp);
1087 parser->pop_state();
1088 break;
1089 }
1090
1091 case not_dap4_element:
1092 if (parser->debug()) cerr << "End of non DAP4 element: " << localname << endl;
1093 parser->pop_state();
1094 break;
1095
1096 case parser_unknown:
1097 parser->pop_state();
1098 break;
1099
1100 case parser_error:
1101 case parser_fatal_error:
1102 break;
1103
1104 case parser_end:
1105 // FIXME Error?
1106 break;
1107 }
1108
1109 if (parser->debug()) cerr << "End element exit state: " << states[parser->get_state()] << endl;
1110}
1111
1115void D4ParserSax2::dmr_get_characters(void * p, const xmlChar * ch, int len)
1116{
1117 D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
1118
1119 switch (parser->get_state()) {
1120 case inside_attribute_value:
1121 parser->char_data.append((const char *) (ch), len);
1122 DBG(cerr << "Characters: '" << parser->char_data << "'" << endl);
1123 break;
1124
1125 case inside_other_xml_attribute:
1126 parser->other_xml.append((const char *) (ch), len);
1127 DBG(cerr << "Other XML Characters: '" << parser->other_xml << "'" << endl);
1128 break;
1129
1130 default:
1131 break;
1132 }
1133}
1134
1139void D4ParserSax2::dmr_ignoreable_whitespace(void *p, const xmlChar *ch, int len)
1140{
1141 D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
1142
1143 switch (parser->get_state()) {
1144 case inside_other_xml_attribute:
1145 parser->other_xml.append((const char *) (ch), len);
1146 break;
1147
1148 default:
1149 break;
1150 }
1151}
1152
1158void D4ParserSax2::dmr_get_cdata(void *p, const xmlChar *value, int len)
1159{
1160 D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
1161
1162 switch (parser->get_state()) {
1163 case inside_other_xml_attribute:
1164 parser->other_xml.append((const char *) (value), len);
1165 break;
1166
1167 case parser_unknown:
1168 break;
1169
1170 default:
1171 D4ParserSax2::dmr_error(parser, "Found a CData block but none are allowed by DAP4.");
1172
1173 break;
1174 }
1175}
1176
1181xmlEntityPtr D4ParserSax2::dmr_get_entity(void *, const xmlChar * name)
1182{
1183 return xmlGetPredefinedEntity(name);
1184}
1185
1196void D4ParserSax2::dmr_fatal_error(void * p, const char *msg, ...)
1197{
1198 va_list args;
1199 D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
1200
1201 parser->push_state(parser_fatal_error);
1202
1203 va_start(args, msg);
1204 char str[1024];
1205 vsnprintf(str, 1024, msg, args);
1206 va_end(args);
1207
1208 int line = xmlSAX2GetLineNumber(parser->d_context);
1209
1210 if (!parser->d_error_msg.empty()) parser->d_error_msg += "\n";
1211 parser->d_error_msg += "At line " + long_to_string(line) + ": " + string(str);
1212}
1213
1214void D4ParserSax2::dmr_error(void *p, const char *msg, ...)
1215{
1216 va_list args;
1217 D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
1218
1219 parser->push_state(parser_error);
1220
1221 va_start(args, msg);
1222 char str[1024];
1223 vsnprintf(str, 1024, msg, args);
1224 va_end(args);
1225
1226 int line = xmlSAX2GetLineNumber(parser->d_context);
1227
1228 if (!parser->d_error_msg.empty()) parser->d_error_msg += "\n";
1229 parser->d_error_msg += "At line " + long_to_string(line) + ": " + string(str);
1230}
1232
1236void D4ParserSax2::cleanup_parse()
1237{
1238 bool wellFormed = d_context->wellFormed;
1239 bool valid = d_context->valid;
1240
1241 // d_context->sax = NULL;
1242 xmlFreeParserCtxt(d_context);
1243
1244 delete d_enum_def;
1245 d_enum_def = 0;
1246
1247 delete d_dim_def;
1248 d_dim_def = 0;
1249
1250 // If there's an error, there may still be items on the stack at the
1251 // end of the parse.
1252 while (!btp_stack.empty()) {
1253 delete top_basetype();
1254 pop_basetype();
1255 }
1256
1257 if (!wellFormed)
1258 throw Error("The DMR was not well formed. " + d_error_msg);
1259 else if (!valid)
1260 throw Error("The DMR was not valid." + d_error_msg);
1261 else if (get_state() == parser_error)
1262 throw Error(d_error_msg);
1263 else if (get_state() == parser_fatal_error)
1264 throw InternalErr(d_error_msg);
1265}
1266
1281void D4ParserSax2::intern(istream &f, DMR *dest_dmr, bool debug)
1282{
1283 d_debug = debug;
1284
1285 // Code example from libxml2 docs re: read from a stream.
1286
1287 if (!f.good())
1288 throw Error("Input stream not open or read error");
1289 if (!dest_dmr)
1290 throw InternalErr(__FILE__, __LINE__, "DMR object is null");
1291
1292 d_dmr = dest_dmr; // dump values here
1293#if 0
1294 int line_num = 1;
1295 string line;
1296
1297 // Get the <xml ... ?> line
1298 getline(f, line);
1299 if (line.length() == 0) throw Error("No input found while parsing the DMR.");
1300
1301 if (debug) cerr << "line: (" << line_num << "): " << endl << line << endl << endl;
1302
1303 d_context = xmlCreatePushParserCtxt(&d_dmr_sax_parser, this, line.c_str(), line.length(), "stream");
1304 d_context->validate = true;
1305 push_state(parser_start);
1306
1307 // Get the first line of stuff
1308 getline(f, line);
1309 ++line_num;
1310
1311 if (debug) cerr << "line: (" << line_num << "): " << endl << line << endl << endl;
1312
1313 while (!f.eof() && (get_state() != parser_end)) {
1314 xmlParseChunk(d_context, line.c_str(), line.length(), 0);
1315
1316 // Get the next line
1317 getline(f, line);
1318 ++line_num;
1319
1320 if (debug) cerr << "line: (" << line_num << "): " << endl << line << endl << endl;
1321 }
1322 // This call ends the parse.
1323 xmlParseChunk(d_context, line.c_str(), 0, 1/*terminate*/);
1324
1325#else
1326 int line_num = 1;
1327 string line;
1328
1329 // Get the XML prolog line (looks like: <?xml ... ?> )
1330 getline(f, line);
1331 if (line.length() == 0) throw Error("No input found while parsing the DMR.");
1332
1333 if (debug) cerr << "line: (" << line_num << "): " << endl << line << endl << endl;
1334
1335 d_context = xmlCreatePushParserCtxt(&d_dmr_sax_parser, this, line.c_str(), line.length(), "stream");
1336 d_context->validate = true;
1337 push_state(parser_start);
1338
1339 // Get the first chunk of the stuff
1340 long chunk_count = 0;
1341 long chunk_size = 0;
1342
1343 f.read(d_parse_buffer, D4_PARSE_BUFF_SIZE);
1344 chunk_size=f.gcount();
1345 d_parse_buffer[chunk_size]=0; // null terminate the string. We can do it this way because the buffer is +1 bigger than D4_PARSE_BUFF_SIZE
1346 if (debug) cerr << "chunk: (" << chunk_count++ << "): " << endl << d_parse_buffer << endl << endl;
1347
1348 while(!f.eof() && (get_state() != parser_end)){
1349
1350 xmlParseChunk(d_context, d_parse_buffer, chunk_size, 0);
1351
1352 // There is more to read. Get the next chunk
1353 f.read(d_parse_buffer, D4_PARSE_BUFF_SIZE);
1354 chunk_size=f.gcount();
1355 d_parse_buffer[chunk_size]=0; // null terminate the string. We can do it this way because the buffer is +1 bigger than D4_PARSE_BUFF_SIZE
1356 if (debug) cerr << "chunk: (" << chunk_count++ << "): " << endl << d_parse_buffer << endl << endl;
1357 }
1358
1359 // This call ends the parse.
1360 xmlParseChunk(d_context, d_parse_buffer, chunk_size, 1/*terminate*/);
1361#endif
1362
1363 // This checks that the state on the parser stack is parser_end and throws
1364 // an exception if it's not (i.e., the loop exited with gcount() == 0).
1365 cleanup_parse();
1366}
1367
1382void D4ParserSax2::intern(const string &document, DMR *dest_dmr, bool debug)
1383{
1384 intern(document.c_str(), document.length(), dest_dmr, debug);
1385}
1386
1397void D4ParserSax2::intern(const char *buffer, int size, DMR *dest_dmr, bool debug)
1398{
1399 if (!(size > 0)) return;
1400
1401 d_debug = debug;
1402
1403 // Code example from libxml2 docs re: read from a stream.
1404
1405 if (!dest_dmr) throw InternalErr(__FILE__, __LINE__, "DMR object is null");
1406 d_dmr = dest_dmr; // dump values in dest_dmr
1407
1408 push_state(parser_start);
1409 d_context = xmlCreatePushParserCtxt(&d_dmr_sax_parser, this, buffer, size, "stream");
1410 d_context->validate = true;
1411
1412 // This call ends the parse.
1413 xmlParseChunk(d_context, buffer, 0, 1/*terminate*/);
1414
1415 // This checks that the state on the parser stack is parser_end and throws
1416 // an exception if it's not (i.e., the loop exited with gcount() == 0).
1417 cleanup_parse();
1418}
1419
1420} // namespace libdap
D4Dimension * find_dim(const string &path)
Find the dimension using a path. Using the DAP4 name syntax, lookup a dimension. The dimension must b...
Definition D4Group.cc:267
Array * find_map_source(const string &path)
Given a path to an Array that is also a Map, get that Array.
Definition D4Group.cc:299
virtual std::string FQN() const
Definition D4Group.cc:182
void intern(istream &f, DMR *dest_dmr, bool debug=false)
D4Group * root()
Definition DMR.cc:296
virtual D4BaseTypeFactory * factory()
Definition DMR.h:127
static std::string getDapNamespaceString(DAPVersion version)
A class for error processing.
Definition Error.h:94
A class for software fault reporting.
Definition InternalErr.h:65
static void dmr_start_document(void *parser)
static void dmr_start_element(void *parser, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI, int nb_namespaces, const xmlChar **namespaces, int nb_attributes, int nb_defaulted, const xmlChar **attributes)
static void dmr_ignoreable_whitespace(void *parser, const xmlChar *ch, int len)
static void dmr_get_characters(void *parser, const xmlChar *ch, int len)
static void dmr_end_document(void *parser)
static void dmr_get_cdata(void *parser, const xmlChar *value, int len)
static xmlEntityPtr dmr_get_entity(void *parser, const xmlChar *name)
static void dmr_fatal_error(void *parser, const char *msg,...)
top level DAP object to house generic methods
bool is_simple_type(Type t)
Returns true if the instance is a numeric, string or URL type variable.
Definition util.cc:778
string D4type_name(Type t)
Returns the type of the class instance as a string. Supports all DAP4 types and not the DAP2-only typ...
Definition util.cc:697
bool is_vector_type(Type t)
Returns true if the instance is a vector (i.e., array) type variable.
Definition util.cc:818
bool is_integer_type(Type t)
Definition util.cc:903
ObjectType get_type(const string &value)
Definition mime_util.cc:324