34 #include <libxml/parserInternals.h>
41 #include <D4Attributes.h>
44 #include <D4BaseTypeFactory.h>
46 #include <DapXmlNamespaces.h>
49 #include <BESInternalError.h>
51 #include <BESCatalog.h>
52 #include <BESCatalogUtils.h>
53 #include <BESCatalogList.h>
56 #include "DmrppParserSax2.h"
57 #include "DmrppCommon.h"
59 #define FIVE_12K 524288;
60 #define ONE_MB 1048576;
61 #define MAX_INPUT_LINE_LENGTH ONE_MB;
64 static const string module =
"dmrpp:2";
65 static const string dmrpp_namespace =
"http://xml.opendap.org/dap/dmrpp/1.0.0#";
72 static const char *states[] = {
"parser_start",
80 "inside_attribute_container",
"inside_attribute",
"inside_attribute_value",
"inside_other_xml_attribute",
82 "inside_enum_def",
"inside_enum_const",
90 "inside_dim",
"inside_map",
94 "not_dap4_element",
"inside_dmrpp_object",
"inside_dmrpp_chunkDimensionSizes_element",
96 "parser_unknown",
"parser_error",
"parser_fatal_error",
100 static bool is_not(
const char *name,
const char *tag)
102 return strcmp(name, tag) != 0;
114 DmrppParserSax2::enum_def()
116 if (!d_enum_def) d_enum_def =
new D4EnumDef;
128 DmrppParserSax2::dim_def()
130 if (!d_dim_def) d_dim_def =
new D4Dimension;
142 string DmrppParserSax2::get_attribute_val(
const string &name,
const xmlChar **attributes,
int num_attributes)
144 unsigned int index = 0;
145 for (
int i = 0; i < num_attributes; ++i, index += 5) {
146 if (strncmp(name.c_str(), (
const char *)attributes[index], name.length()) == 0) {
147 return string((
const char *)attributes[index+3], (
const char *)attributes[index+4]);
159 void DmrppParserSax2::transfer_xml_attrs(
const xmlChar **attributes,
int nb_attributes)
161 if (!xml_attrs.empty()) xml_attrs.clear();
165 unsigned int index = 0;
166 for (
int i = 0; i < nb_attributes; ++i, index += 5) {
168 map<string, XMLAttribute>::value_type(
string((
const char *) attributes[index]),
169 XMLAttribute(attributes + index + 1)));
172 "XML Attribute '" << (
const char *)attributes[index] <<
"': " << xml_attrs[(
const char *)attributes[index]].value << endl);
183 void DmrppParserSax2::transfer_xml_ns(
const xmlChar **namespaces,
int nb_namespaces)
186 for (
int i = 0; i < nb_namespaces; ++i) {
187 namespace_table.insert(
188 map<string, string>::value_type(namespaces[i * 2] != 0 ? (
const char *) namespaces[i * 2] :
"",
189 (
const char *) namespaces[i * 2 + 1]));
200 bool DmrppParserSax2::check_required_attribute(
const string & attr)
202 if (xml_attrs.find(attr) == xml_attrs.end()) {
203 dmr_error(
this,
"Required attribute '%s' not found.", attr.c_str());
220 bool DmrppParserSax2::check_required_attribute(
const string &name,
const xmlChar **attributes,
int num_attributes)
222 unsigned int index = 0;
223 for (
int i = 0; i < num_attributes; ++i, index += 5) {
224 if (strncmp(name.c_str(), (
const char *)attributes[index], name.length()) == 0) {
229 dmr_error(
this,
"Required attribute '%s' not found.", name.c_str());
240 bool DmrppParserSax2::check_attribute(
const string & attr)
242 return (xml_attrs.find(attr) != xml_attrs.end());
256 bool DmrppParserSax2::check_attribute(
const string &name,
const xmlChar **attributes,
int num_attributes)
258 unsigned int index = 0;
259 for (
int i = 0; i < num_attributes; ++i, index += 5) {
260 if (strncmp(name.c_str(), (
const char *)attributes[index], name.length()) == 0) {
267 bool DmrppParserSax2::process_dimension_def(
const char *name,
const xmlChar **attrs,
int nb_attributes)
269 if (is_not(name,
"Dimension"))
return false;
272 transfer_xml_attrs(attrs, nb_attributes);
275 if (!(check_required_attribute(
"name", attrs, nb_attributes) && check_required_attribute(
"size", attrs, nb_attributes))) {
276 dmr_error(
this,
"The required attribute 'name' or 'size' was missing from a Dimension element.");
281 dim_def()->set_name(get_attribute_val(
"name", attrs, nb_attributes));
283 dim_def()->set_size(get_attribute_val(
"size", attrs, nb_attributes));
286 dmr_error(
this, e.get_error_message().c_str());
310 bool DmrppParserSax2::process_dimension(
const char *name,
const xmlChar **attrs,
int nb_attributes)
312 if (is_not(name,
"Dim"))
return false;
315 transfer_xml_attrs(attrs, nb_attributes);
318 if (check_attribute(
"size", attrs, nb_attributes) && check_attribute(
"name", attrs, nb_attributes)) {
319 dmr_error(
this,
"Only one of 'size' and 'name' are allowed in a Dim element, but both were used.");
322 if (!(check_attribute(
"size", attrs, nb_attributes) || check_attribute(
"name", attrs, nb_attributes))) {
323 dmr_error(
this,
"Either 'size' or 'name' must be used in a Dim element.");
327 if (!top_basetype()->is_vector_type()) {
329 BaseType *b = top_basetype();
332 Array *a =
static_cast<Array*
>(dmr()->factory()->NewVariable(dods_array_c, b->name()));
333 a->set_is_dap4(
true);
334 a->add_var_nocopy(b);
335 a->set_attributes_nocopy(b->attributes());
339 b->set_attributes_nocopy(0);
344 assert(top_basetype()->is_vector_type());
346 Array *a =
static_cast<Array*
>(top_basetype());
347 if (check_attribute(
"size", attrs, nb_attributes)) {
348 a->append_dim(stoi(get_attribute_val(
"size", attrs, nb_attributes)));
351 else if (check_attribute(
"name", attrs, nb_attributes)) {
352 string name = get_attribute_val(
"name", attrs, nb_attributes);
354 D4Dimension *dim = 0;
356 dim = dmr()->root()->find_dim(name);
359 dim = top_group()->find_dim(name);
362 throw Error(
"The dimension '" + name +
"' was not found while parsing the variable '" + a->name() +
"'.");
370 bool DmrppParserSax2::process_map(
const char *name,
const xmlChar **attrs,
int nb_attributes)
372 if (is_not(name,
"Map"))
return false;
375 transfer_xml_attrs(attrs, nb_attributes);
378 if (!check_attribute(
"name", attrs, nb_attributes)) {
379 dmr_error(
this,
"The 'name' attribute must be used in a Map element.");
383 if (!top_basetype()->is_vector_type()) {
385 BaseType *b = top_basetype();
388 Array *a =
static_cast<Array*
>(dmr()->factory()->NewVariable(dods_array_c, b->name()));
389 a->set_is_dap4(
true);
390 a->add_var_nocopy(b);
391 a->set_attributes_nocopy(b->attributes());
395 b->set_attributes_nocopy(0);
400 assert(top_basetype()->is_vector_type());
402 Array *a =
static_cast<Array*
>(top_basetype());
404 string map_name = get_attribute_val(
"name", attrs, nb_attributes);
405 if (get_attribute_val(
"name", attrs, nb_attributes).at(0) !=
'/') map_name = top_group()->FQN() + map_name;
407 Array *map_source = 0;
409 if (map_name[0] ==
'/')
410 map_source = dmr()->root()->find_map_source(map_name);
413 map_source = top_group()->find_map_source(map_name);
421 if (!map_source && d_strict)
422 throw Error(
"The Map '" + map_name +
"' was not found while parsing the variable '" + a->name() +
"'.");
424 a->maps()->add_map(
new D4Map(map_name, map_source));
429 bool DmrppParserSax2::process_group(
const char *name,
const xmlChar **attrs,
int nb_attributes)
431 if (is_not(name,
"Group"))
return false;
434 transfer_xml_attrs(attrs, nb_attributes);
437 if (!check_required_attribute(
"name", attrs, nb_attributes)) {
438 dmr_error(
this,
"The required attribute 'name' was missing from a Group element.");
442 BaseType *btp = dmr()->factory()->NewVariable(dods_group_c, get_attribute_val(
"name", attrs, nb_attributes));
444 dmr_fatal_error(
this,
"Could not instantiate the Group '%s'.", get_attribute_val(
"name", attrs, nb_attributes).c_str());
448 D4Group *grp =
static_cast<D4Group*
>(btp);
452 grp->set_is_dap4(
true);
455 D4Group *parent = top_group();
457 dmr_fatal_error(
this,
"No Group on the Group stack.");
461 grp->set_parent(parent);
462 parent->add_group_nocopy(grp);
465 push_attributes(grp->attributes());
475 inline bool DmrppParserSax2::process_attribute(
const char *name,
const xmlChar **attrs,
int nb_attributes)
477 if (is_not(name,
"Attribute"))
return false;
481 transfer_xml_attrs(attrs, nb_attributes);
485 if (!(check_required_attribute(
string(
"name"), attrs, nb_attributes) && check_required_attribute(
string(
"type"), attrs, nb_attributes))) {
486 dmr_error(
this,
"The required attribute 'name' or 'type' was missing from an Attribute element.");
490 if (get_attribute_val(
"type", attrs, nb_attributes) ==
"Container") {
491 push_state(inside_attribute_container);
493 BESDEBUG(module,
"Pushing attribute container " << get_attribute_val(
"name", attrs, nb_attributes) << endl);
494 D4Attribute *child =
new D4Attribute(get_attribute_val(
"name", attrs, nb_attributes), attr_container_c);
496 D4Attributes *tos = top_attributes();
500 dmr_fatal_error(
this,
"Expected an Attribute container on the top of the attribute stack.");
504 tos->add_attribute_nocopy(child);
505 push_attributes(child->attributes());
507 else if (get_attribute_val(
"type", attrs, nb_attributes) ==
"OtherXML") {
508 push_state(inside_other_xml_attribute);
510 dods_attr_name = get_attribute_val(
"name", attrs, nb_attributes);
511 dods_attr_type = get_attribute_val(
"type", attrs, nb_attributes);
514 push_state(inside_attribute);
516 dods_attr_name = get_attribute_val(
"name", attrs, nb_attributes);
517 dods_attr_type = get_attribute_val(
"type", attrs, nb_attributes);
528 inline bool DmrppParserSax2::process_enum_def(
const char *name,
const xmlChar **attrs,
int nb_attributes)
530 if (is_not(name,
"Enumeration"))
return false;
533 transfer_xml_attrs(attrs, nb_attributes);
536 if (!(check_required_attribute(
"name", attrs, nb_attributes) && check_required_attribute(
"basetype", attrs, nb_attributes))) {
537 dmr_error(
this,
"The required attribute 'name' or 'basetype' was missing from an Enumeration element.");
541 Type t = get_type(get_attribute_val(
"basetype", attrs, nb_attributes).c_str());
542 if (!is_integer_type(t)) {
543 dmr_error(
this,
"The Enumeration '%s' must have an integer type, instead the type '%s' was used.",
544 get_attribute_val(
"name", attrs, nb_attributes).c_str(), get_attribute_val(
"basetype", attrs, nb_attributes).c_str());
549 string enum_def_path = get_attribute_val(
"name", attrs, nb_attributes);
552 if (xml_attrs[
"name"].value[0] !=
'/')
553 enum_def_path = top_group()->FQN() + enum_def_path;
555 enum_def()->set_name(enum_def_path);
556 enum_def()->set_type(t);
561 inline bool DmrppParserSax2::process_enum_const(
const char *name,
const xmlChar **attrs,
int nb_attributes)
563 if (is_not(name,
"EnumConst"))
return false;
567 transfer_xml_attrs(attrs, nb_attributes);
570 if (!(check_required_attribute(
"name", attrs, nb_attributes) && check_required_attribute(
"value", attrs, nb_attributes))) {
571 dmr_error(
this,
"The required attribute 'name' or 'value' was missing from an EnumConst element.");
575 istringstream iss(get_attribute_val(
"value", attrs, nb_attributes));
577 iss >> skipws >> value;
578 if (iss.fail() || iss.bad()) {
579 dmr_error(
this,
"Expected an integer value for an Enumeration constant, got '%s' instead.",
580 get_attribute_val(
"value", attrs, nb_attributes).c_str());
582 else if (!enum_def()->is_valid_enum_value(value)) {
583 dmr_error(
this,
"In an Enumeration constant, the value '%s' cannot fit in a variable of type '%s'.",
584 get_attribute_val(
"value", attrs, nb_attributes).c_str(), D4type_name(d_enum_def->type()).c_str());
588 enum_def()->add_value(get_attribute_val(
"name", attrs, nb_attributes), value);
599 inline bool DmrppParserSax2::process_variable(
const char *name,
const xmlChar **attrs,
int nb_attributes)
601 Type t = get_type(name);
602 if (is_simple_type(t)) {
603 process_variable_helper(t, inside_simple_type, attrs, nb_attributes);
608 case dods_structure_c:
609 process_variable_helper(t, inside_constructor, attrs, nb_attributes);
612 case dods_sequence_c:
613 process_variable_helper(t, inside_constructor, attrs, nb_attributes);
629 void DmrppParserSax2::process_variable_helper(
Type t, ParseState s,
const xmlChar **attrs,
int nb_attributes)
632 transfer_xml_attrs(attrs, nb_attributes);
635 if (check_required_attribute(
"name", attrs, nb_attributes)) {
636 BaseType *btp = dmr()->factory()->NewVariable(t, get_attribute_val(
"name", attrs, nb_attributes));
638 dmr_fatal_error(
this,
"Could not instantiate the variable '%s'.", xml_attrs[
"name"].value.c_str());
642 if ((t == dods_enum_c) && check_required_attribute(
"enum", attrs, nb_attributes)) {
643 D4EnumDef *enum_def = 0;
644 string enum_path = get_attribute_val(
"enum", attrs, nb_attributes);
645 if (enum_path[0] ==
'/')
646 enum_def = dmr()->root()->find_enum_def(enum_path);
648 enum_def = top_group()->find_enum_def(enum_path);
650 if (!enum_def) dmr_fatal_error(
this,
"Could not find the Enumeration definition '%s'.", enum_path.c_str());
652 static_cast<D4Enum*
>(btp)->set_enumeration(enum_def);
655 btp->set_is_dap4(
true);
658 push_attributes(btp->attributes());
674 void DmrppParserSax2::dmr_start_document(
void * p)
677 parser->error_msg =
"";
678 parser->char_data =
"";
685 parser->push_attributes(parser->dmr()->root()->attributes());
687 if (parser->debug()) cerr <<
"Parser start state: " << states[parser->get_state()] << endl;
692 void DmrppParserSax2::dmr_end_document(
void * p)
696 if (parser->debug()) cerr <<
"Parser end state: " << states[parser->get_state()] << endl;
698 if (parser->get_state() != parser_end)
699 DmrppParserSax2::dmr_error(parser,
"The document contained unbalanced tags.");
703 if (parser->get_state() == parser_error || parser->get_state() == parser_fatal_error)
return;
705 if (!parser->empty_basetype() || parser->empty_group())
706 DmrppParserSax2::dmr_error(parser,
707 "The document did not contain a valid root Group or contained unbalanced tags.");
709 if (parser->debug()) parser->top_group()->dump(cerr);
712 parser->pop_attributes();
715 void DmrppParserSax2::dmr_start_element(
void *p,
const xmlChar *l,
const xmlChar *prefix,
const xmlChar *URI,
716 int nb_namespaces,
const xmlChar **namespaces,
int nb_attributes,
int ,
const xmlChar **attributes)
719 const char *localname =
reinterpret_cast<const char *
>(l);
721 string this_element_ns_name(URI ? (
char *) URI :
"null");
723 if (parser->get_state() != parser_error) {
724 string dap4_ns_name = DapXmlNamspaces::getDapNamespaceString(DAP_4_0);
725 if (parser->debug()) cerr <<
"dap4_ns_name: " << dap4_ns_name << endl;
727 if (this_element_ns_name == dmrpp_namespace) {
728 if (strcmp(localname,
"chunkDimensionSizes") == 0) {
729 if (parser->debug()) cerr <<
"Found dmrpp:chunkDimensionSizes element. Pushing state." << endl;
730 parser->push_state(inside_dmrpp_chunkDimensionSizes_element);
734 cerr <<
"Start of element in dmrpp namespace: " << localname <<
" detected." << endl;
735 parser->push_state(inside_dmrpp_object);
740 else if (this_element_ns_name != dap4_ns_name) {
741 if (parser->debug()) cerr <<
"Start of non DAP4 element: " << localname <<
" detected." << endl;
742 parser->push_state(not_dap4_element);
747 cerr <<
"Start element " << localname <<
" prefix: " << (prefix ? (
char *) prefix :
"null") <<
" ns: "
748 << this_element_ns_name <<
" (state: " << states[parser->get_state()] <<
")" << endl;
750 switch (parser->get_state()) {
752 if (is_not(localname,
"Dataset"))
753 DmrppParserSax2::dmr_error(parser,
"Expected DMR to start with a Dataset element; found '%s' instead.",
756 parser->root_ns = URI ? (
const char *) URI :
"";
759 parser->transfer_xml_attrs(attributes, nb_attributes);
762 if (parser->check_required_attribute(
string(
"name"), attributes, nb_attributes)) parser->dmr()->set_name(parser->get_attribute_val(
"name", attributes, nb_attributes));
764 if (parser->check_attribute(
"dapVersion", attributes, nb_attributes))
765 parser->dmr()->set_dap_version(parser->get_attribute_val(
"dapVersion", attributes, nb_attributes));
767 if (parser->check_attribute(
"dmrVersion", attributes, nb_attributes))
768 parser->dmr()->set_dmr_version(parser->get_attribute_val(
"dmrVersion", attributes, nb_attributes));
770 if (parser->check_attribute(
"base", attributes, nb_attributes)) {
771 parser->dmr()->set_request_xml_base(parser->get_attribute_val(
"base", attributes, nb_attributes));
773 if (parser->debug()) cerr <<
"Dataset xml:base is set to '" << parser->dmr()->request_xml_base() <<
"'" << endl;
775 if (parser->check_attribute(
"href", attributes, nb_attributes)) {
776 parser->dmrpp_dataset_href = parser->get_attribute_val(
"href", attributes, nb_attributes);
778 if (parser->debug()) cerr <<
"Dataset dmrpp:href is set to '" << parser->dmrpp_dataset_href <<
"'" << endl;
780 if (!parser->root_ns.empty()) parser->dmr()->set_namespace(parser->root_ns);
783 parser->push_group(parser->dmr()->root());
785 parser->push_state(inside_dataset);
794 if (parser->process_enum_def(localname, attributes, nb_attributes))
795 parser->push_state(inside_enum_def);
796 else if (parser->process_dimension_def(localname, attributes, nb_attributes))
797 parser->push_state(inside_dim_def);
798 else if (parser->process_group(localname, attributes, nb_attributes))
799 parser->push_state(inside_group);
800 else if (parser->process_variable(localname, attributes, nb_attributes))
804 else if (parser->process_attribute(localname, attributes, nb_attributes))
809 DmrppParserSax2::dmr_error(parser,
810 "Expected an Attribute, Enumeration, Dimension, Group or variable element; found '%s' instead.",
814 case inside_attribute_container:
815 if (parser->process_attribute(localname, attributes, nb_attributes))
818 DmrppParserSax2::dmr_error(parser,
"Expected an Attribute element; found '%s' instead.", localname);
821 case inside_attribute:
822 if (parser->process_attribute(localname, attributes, nb_attributes))
824 else if (strcmp(localname,
"Value") == 0)
825 parser->push_state(inside_attribute_value);
827 dmr_error(parser,
"Expected an 'Attribute' or 'Value' element; found '%s' instead.", localname);
830 case inside_attribute_value:
834 case inside_other_xml_attribute:
835 parser->other_xml_depth++;
838 parser->other_xml.append(
"<");
840 parser->other_xml.append((
const char *) prefix);
841 parser->other_xml.append(
":");
843 parser->other_xml.append(localname);
845 if (nb_namespaces != 0) {
846 parser->transfer_xml_ns(namespaces, nb_namespaces);
848 for (map<string, string>::iterator i = parser->namespace_table.begin(); i != parser->namespace_table.end();
850 parser->other_xml.append(
" xmlns");
851 if (!i->first.empty()) {
852 parser->other_xml.append(
":");
853 parser->other_xml.append(i->first);
855 parser->other_xml.append(
"=\"");
856 parser->other_xml.append(i->second);
857 parser->other_xml.append(
"\"");
861 if (nb_attributes != 0) {
863 parser->transfer_xml_attrs(attributes, nb_attributes);
865 for (XMLAttrMap::iterator i = parser->xml_attr_begin(); i != parser->xml_attr_end(); ++i) {
866 parser->other_xml.append(
" ");
867 if (!i->second.prefix.empty()) {
868 parser->other_xml.append(i->second.prefix);
869 parser->other_xml.append(
":");
871 parser->other_xml.append(i->first);
872 parser->other_xml.append(
"=\"");
873 parser->other_xml.append(i->second.value);
874 parser->other_xml.append(
"\"");
878 parser->other_xml.append(
">");
881 case inside_enum_def:
883 if (parser->process_enum_const(localname, attributes, nb_attributes))
884 parser->push_state(inside_enum_const);
886 dmr_error(parser,
"Expected an 'EnumConst' element; found '%s' instead.", localname);
889 case inside_enum_const:
905 case inside_simple_type:
906 if (parser->process_attribute(localname, attributes, nb_attributes))
908 else if (parser->process_dimension(localname, attributes, nb_attributes))
909 parser->push_state(inside_dim);
910 else if (parser->process_map(localname, attributes, nb_attributes))
911 parser->push_state(inside_map);
913 dmr_error(parser,
"Expected an 'Attribute', 'Dim' or 'Map' element; found '%s' instead.", localname);
916 case inside_constructor:
917 if (parser->process_variable(localname, attributes, nb_attributes))
921 else if (parser->process_attribute(localname, attributes, nb_attributes))
923 else if (parser->process_dimension(localname, attributes, nb_attributes))
924 parser->push_state(inside_dim);
925 else if (parser->process_map(localname, attributes, nb_attributes))
926 parser->push_state(inside_map);
928 DmrppParserSax2::dmr_error(parser,
929 "Expected an Attribute, Dim, Map or variable element; found '%s' instead.", localname);
932 case not_dap4_element:
934 cerr <<
"SKIPPING unexpected element. localname: " << localname <<
"namespace: " << this_element_ns_name
938 case inside_dmrpp_object: {
939 if (parser->debug()) cerr <<
"Inside dmrpp namespaced element. localname: " << localname << endl;
940 assert(this_element_ns_name == dmrpp_namespace);
943 parser->transfer_xml_attrs(attributes, nb_attributes);
946 BaseType *bt = parser->top_basetype();
947 if (!bt)
throw BESInternalError(
"Could locate parent BaseType during parse operation.", __FILE__, __LINE__);
949 DmrppCommon *dc =
dynamic_cast<DmrppCommon*
>(bt);
951 throw BESInternalError(
"Could not cast BaseType to DmrppType in the drmpp handler.", __FILE__, __LINE__);
954 if (strcmp(localname,
"chunks") == 0) {
955 if (parser->debug()) cerr <<
"DMR++ chunks element. localname: " << localname << endl;
957 if (parser->check_attribute(
"compressionType", attributes, nb_attributes)) {
958 string compression_type_string(parser->get_attribute_val(
"compressionType", attributes, nb_attributes));
959 dc->ingest_compression_type(compression_type_string);
962 cerr <<
"Processed attribute 'compressionType=\"" << compression_type_string <<
"\"'" << endl;
966 cerr <<
"There was no 'compressionType' attribute associated with the variable '" << bt->type_name()
967 <<
" " << bt->name() <<
"'" << endl;
971 else if (strcmp(localname,
"chunk") == 0) {
972 string data_url =
"unknown_data_location";
973 if (parser->check_attribute(
"href", attributes, nb_attributes)) {
975 istringstream data_url_ss(parser->xml_attrs[
"href"].value);
976 data_url = data_url_ss.str();
978 cerr <<
"Processing 'href' value into data_url. href: " << data_url_ss.str() << endl;
981 data_url = parser->get_attribute_val(
"href", attributes, nb_attributes);
983 cerr <<
"Processing 'href' value into data_url. href: " << data_url << endl;
986 if (parser->debug()) cerr <<
"No attribute 'href' located. Trying Dataset/@dmrpp:href..." << endl;
989 data_url = parser->dmrpp_dataset_href;
991 cerr <<
"Processing dmrpp:href into data_url. dmrpp:href='" << data_url <<
"'" << endl;
996 std::string http(
"http://");
997 std::string https(
"https://");
998 std::string file(
"file://");
999 if (data_url.compare(0, http.size(), http) && data_url.compare(0, https.size(), https)
1000 && data_url.compare(0, file.size(), file)) {
1003 if (data_url.find(
"http://") != 0 && data_url.find(
"https://") != 0 && data_url.find(
"file://") != 0) {
1004 if (parser->debug()) cerr <<
"data_url does NOT start with 'http://', 'https://' or 'file://'. "
1005 "Retrieving default catalog root directory" << endl;
1010 if (parser->debug()) cerr <<
"Not able to find the default catalog." << endl;
1016 if (parser->debug())
1017 cerr <<
"Found default catalog root_dir: '" << utils->
get_root_dir() <<
"'" << endl;
1020 data_url =
"file://" + data_url;
1024 if (parser->debug()) cerr <<
"Processed data_url: '" << data_url <<
"'" << endl;
1026 unsigned long long offset = 0;
1027 unsigned long long size = 0;
1028 string chunk_position_in_array(
"");
1030 if (parser->check_required_attribute(
"offset", attributes, nb_attributes)) {
1031 istringstream offset_ss(parser->get_attribute_val(
"offset", attributes, nb_attributes));
1032 offset_ss >> offset;
1033 if (parser->debug()) cerr <<
"Processed attribute 'offset=\"" << offset <<
"\"'" << endl;
1036 dmr_error(parser,
"The hdf:byteStream element is missing the required attribute 'offset'.");
1039 if (parser->check_required_attribute(
"nBytes", attributes, nb_attributes)) {
1040 istringstream size_ss(parser->get_attribute_val(
"nBytes", attributes, nb_attributes));
1042 if (parser->debug()) cerr <<
"Processed attribute 'nBytes=\"" << size <<
"\"'" << endl;
1045 dmr_error(parser,
"The hdf:byteStream element is missing the required attribute 'size'.");
1048 if (parser->check_attribute(
"chunkPositionInArray", attributes, nb_attributes)) {
1049 istringstream chunk_position_ss(parser->get_attribute_val(
"chunkPositionInArray", attributes, nb_attributes));
1050 chunk_position_in_array = chunk_position_ss.str();
1051 if (parser->debug())
1052 cerr <<
"Found attribute 'chunkPositionInArray' value: " << chunk_position_ss.str() << endl;
1055 if (parser->debug()) cerr <<
"No attribute 'chunkPositionInArray' located" << endl;
1058 dc->add_chunk(data_url, size, offset, chunk_position_in_array);
1063 case inside_dmrpp_chunkDimensionSizes_element:
1067 case parser_unknown:
1069 case parser_fatal_error:
1077 if (parser->debug()) cerr <<
"Start element exit state: " << states[parser->get_state()] << endl;
1080 void DmrppParserSax2::dmr_end_element(
void *p,
const xmlChar *l,
const xmlChar *prefix,
const xmlChar *URI)
1082 DmrppParserSax2 *parser =
static_cast<DmrppParserSax2*
>(p);
1083 const char *localname = (
const char *) l;
1085 if (parser->debug())
1086 cerr <<
"End element " << localname <<
" (state " << states[parser->get_state()] <<
")" << endl;
1088 switch (parser->get_state()) {
1090 dmr_fatal_error(parser,
"Unexpected state, inside start state while processing element '%s'.", localname);
1093 case inside_dataset:
1094 if (is_not(localname,
"Dataset"))
1095 DmrppParserSax2::dmr_error(parser,
"Expected an end Dataset tag; found '%s' instead.", localname);
1097 parser->pop_state();
1098 if (parser->get_state() != parser_start)
1099 dmr_fatal_error(parser,
"Unexpected state, expected start state.");
1101 parser->pop_state();
1102 parser->push_state(parser_end);
1106 case inside_group: {
1107 if (is_not(localname,
"Group"))
1108 DmrppParserSax2::dmr_error(parser,
"Expected an end tag for a Group; found '%s' instead.", localname);
1110 if (!parser->empty_basetype() || parser->empty_group())
1111 DmrppParserSax2::dmr_error(parser,
1112 "The document did not contain a valid root Group or contained unbalanced tags.");
1114 parser->pop_group();
1115 parser->pop_state();
1119 case inside_attribute_container:
1120 if (is_not(localname,
"Attribute"))
1121 DmrppParserSax2::dmr_error(parser,
"Expected an end Attribute tag; found '%s' instead.", localname);
1123 parser->pop_state();
1124 parser->pop_attributes();
1127 case inside_attribute:
1128 if (is_not(localname,
"Attribute"))
1129 DmrppParserSax2::dmr_error(parser,
"Expected an end Attribute tag; found '%s' instead.", localname);
1131 parser->pop_state();
1134 case inside_attribute_value: {
1135 if (is_not(localname,
"Value"))
1136 DmrppParserSax2::dmr_error(parser,
"Expected an end value tag; found '%s' instead.", localname);
1138 parser->pop_state();
1143 D4Attributes *attrs = parser->top_attributes();
1144 D4Attribute *attr = attrs->get(parser->dods_attr_name);
1146 attr =
new D4Attribute(parser->dods_attr_name, StringToD4AttributeType(parser->dods_attr_type));
1147 attrs->add_attribute_nocopy(attr);
1149 attr->add_value(parser->char_data);
1151 parser->char_data =
"";
1155 case inside_other_xml_attribute: {
1156 if (strcmp(localname,
"Attribute") == 0 && parser->root_ns == (
const char *) URI) {
1157 parser->pop_state();
1162 D4Attributes *attrs = parser->top_attributes();
1163 D4Attribute *attr = attrs->get(parser->dods_attr_name);
1165 attr =
new D4Attribute(parser->dods_attr_name, StringToD4AttributeType(parser->dods_attr_type));
1166 attrs->add_attribute_nocopy(attr);
1168 attr->add_value(parser->other_xml);
1170 parser->other_xml =
"";
1173 if (parser->other_xml_depth == 0) {
1174 DmrppParserSax2::dmr_error(parser,
"Expected an OtherXML attribute to end! Instead I found '%s'",
1178 parser->other_xml_depth--;
1180 parser->other_xml.append(
"</");
1182 parser->other_xml.append((
const char *) prefix);
1183 parser->other_xml.append(
":");
1185 parser->other_xml.append(localname);
1186 parser->other_xml.append(
">");
1191 case inside_enum_def:
1192 if (is_not(localname,
"Enumeration"))
1193 DmrppParserSax2::dmr_error(parser,
"Expected an end Enumeration tag; found '%s' instead.", localname);
1194 if (!parser->top_group())
1195 DmrppParserSax2::dmr_fatal_error(parser,
1196 "Expected a Group to be the current item, while finishing up an Enumeration.");
1199 parser->top_group()->enum_defs()->add_enum_nocopy(parser->enum_def());
1202 parser->clear_enum_def();
1203 parser->pop_state();
1207 case inside_enum_const:
1208 if (is_not(localname,
"EnumConst"))
1209 DmrppParserSax2::dmr_error(parser,
"Expected an end EnumConst tag; found '%s' instead.", localname);
1211 parser->pop_state();
1214 case inside_dim_def: {
1215 if (is_not(localname,
"Dimension"))
1216 DmrppParserSax2::dmr_error(parser,
"Expected an end Dimension tag; found '%s' instead.", localname);
1218 if (!parser->top_group())
1219 DmrppParserSax2::dmr_error(parser,
1220 "Expected a Group to be the current item, while finishing up an Dimension.");
1222 parser->top_group()->dims()->add_dim_nocopy(parser->dim_def());
1228 parser->clear_dim_def();
1229 parser->pop_state();
1233 case inside_simple_type:
1234 if (is_simple_type(get_type(localname))) {
1235 BaseType *btp = parser->top_basetype();
1236 parser->pop_basetype();
1237 parser->pop_attributes();
1239 BaseType *parent = 0;
1240 if (!parser->empty_basetype())
1241 parent = parser->top_basetype();
1242 else if (!parser->empty_group())
1243 parent = parser->top_group();
1245 dmr_fatal_error(parser,
"Both the Variable and Groups stacks are empty while closing a %s element.",
1248 parser->pop_state();
1252 if (parent->type() == dods_array_c)
1253 static_cast<Array*
>(parent)->prototype()->add_var_nocopy(btp);
1255 parent->add_var_nocopy(btp);
1258 DmrppParserSax2::dmr_error(parser,
"Expected an end tag for a simple type; found '%s' instead.", localname);
1260 parser->pop_state();
1264 if (is_not(localname,
"Dim"))
1265 DmrppParserSax2::dmr_fatal_error(parser,
"Expected an end Dim tag; found '%s' instead.", localname);
1267 parser->pop_state();
1271 if (is_not(localname,
"Map"))
1272 DmrppParserSax2::dmr_fatal_error(parser,
"Expected an end Map tag; found '%s' instead.", localname);
1274 parser->pop_state();
1277 case inside_constructor: {
1278 if (strcmp(localname,
"Structure") != 0 && strcmp(localname,
"Sequence") != 0) {
1279 DmrppParserSax2::dmr_error(parser,
"Expected an end tag for a constructor; found '%s' instead.", localname);
1283 BaseType *btp = parser->top_basetype();
1284 parser->pop_basetype();
1285 parser->pop_attributes();
1287 BaseType *parent = 0;
1288 if (!parser->empty_basetype())
1289 parent = parser->top_basetype();
1290 else if (!parser->empty_group())
1291 parent = parser->top_group();
1293 dmr_fatal_error(parser,
"Both the Variable and Groups stacks are empty while closing a %s element.",
1296 parser->pop_state();
1302 parent->add_var_nocopy(btp);
1303 parser->pop_state();
1307 case not_dap4_element:
1308 if (parser->debug()) cerr <<
"End of non DAP4 element: " << localname << endl;
1309 parser->pop_state();
1312 case inside_dmrpp_object:
1313 if (parser->debug()) cerr <<
"End of dmrpp namespace element: " << localname << endl;
1314 parser->pop_state();
1317 case inside_dmrpp_chunkDimensionSizes_element: {
1318 if (parser->debug()) cerr <<
"End of chunkDimensionSizes element. localname: " << localname << endl;
1320 if (is_not(localname,
"chunkDimensionSizes"))
1321 DmrppParserSax2::dmr_error(parser,
"Expected an end value tag; found '%s' instead.", localname);
1322 DmrppCommon *dc =
dynamic_cast<DmrppCommon*
>(parser->top_basetype());
1324 throw BESInternalError(
"Could not cast BaseType to DmrppType in the drmpp handler.", __FILE__, __LINE__);
1325 string element_text(parser->char_data);
1326 if (parser->debug()) cerr <<
"chunkDimensionSizes element_text: '" << element_text <<
"'" << endl;
1327 dc->parse_chunk_dimension_sizes(element_text);
1328 parser->char_data =
"";
1329 parser->pop_state();
1333 case parser_unknown:
1334 parser->pop_state();
1338 case parser_fatal_error:
1346 if (parser->debug()) cerr <<
"End element exit state: " << states[parser->get_state()] << endl;
1352 void DmrppParserSax2::dmr_get_characters(
void * p,
const xmlChar * ch,
int len)
1356 switch (parser->get_state()) {
1357 case inside_attribute_value:
1358 case inside_dmrpp_chunkDimensionSizes_element:
1359 parser->char_data.append((
const char *) (ch), len);
1360 BESDEBUG(module,
"Characters[" << parser->char_data.size() <<
"]" << parser->char_data <<
"'" << endl);
1363 case inside_other_xml_attribute:
1364 parser->other_xml.append((
const char *) (ch), len);
1365 BESDEBUG(module,
"Other XML Characters: '" << parser->other_xml <<
"'" << endl);
1377 void DmrppParserSax2::dmr_ignoreable_whitespace(
void *p,
const xmlChar *ch,
int len)
1381 switch (parser->get_state()) {
1382 case inside_other_xml_attribute:
1383 parser->other_xml.append((
const char *) (ch), len);
1396 void DmrppParserSax2::dmr_get_cdata(
void *p,
const xmlChar *value,
int len)
1400 switch (parser->get_state()) {
1401 case inside_other_xml_attribute:
1402 parser->other_xml.append((
const char *) (value), len);
1405 case parser_unknown:
1409 DmrppParserSax2::dmr_error(parser,
"Found a CData block but none are allowed by DAP4.");
1419 xmlEntityPtr DmrppParserSax2::dmr_get_entity(
void *,
const xmlChar * name)
1421 return xmlGetPredefinedEntity(name);
1434 void DmrppParserSax2::dmr_fatal_error(
void * p,
const char *msg, ...)
1439 parser->push_state(parser_fatal_error);
1441 va_start(args, msg);
1443 vsnprintf(str, 1024, msg, args);
1446 int line = xmlSAX2GetLineNumber(parser->context);
1448 if (!parser->error_msg.empty()) parser->error_msg +=
"\n";
1449 parser->error_msg +=
"At line " + long_to_string(line) +
": " + string(str);
1452 void DmrppParserSax2::dmr_error(
void *p,
const char *msg, ...)
1457 parser->push_state(parser_error);
1459 va_start(args, msg);
1461 vsnprintf(str, 1024, msg, args);
1464 int line = xmlSAX2GetLineNumber(parser->context);
1466 if (!parser->error_msg.empty()) parser->error_msg +=
"\n";
1467 parser->error_msg +=
"At line " + long_to_string(line) +
": " + string(str);
1474 void DmrppParserSax2::cleanup_parse()
1476 bool wellFormed = context->wellFormed;
1477 bool valid = context->valid;
1481 xmlFreeParserCtxt(context);
1491 while (!btp_stack.empty()) {
1492 delete top_basetype();
1497 throw Error(
"The DMR was not well formed. " + error_msg);
1499 throw Error(
"The DMR was not valid." + error_msg);
1500 else if (get_state() == parser_error)
1501 throw Error(error_msg);
1502 else if (get_state() == parser_fatal_error)
throw InternalErr(error_msg);
1519 void DmrppParserSax2::intern(istream &f, DMR *dest_dmr,
bool debug)
1525 if (!f.good())
throw Error(
"Input stream not open or read error");
1526 if (!dest_dmr)
throw InternalErr(__FILE__, __LINE__,
"DMR object is null");
1536 if (line.length() == 0)
throw Error(
"No input found while parsing the DMR.");
1538 if (debug) cerr <<
"line: (" << line_num <<
"): " << endl << line << endl << endl;
1540 context = xmlCreatePushParserCtxt(&dmrpp_sax_parser,
this, line.c_str(), line.length(),
"stream");
1541 context->validate =
true;
1542 push_state(parser_start);
1548 if (debug) cerr <<
"line: (" << line_num <<
"): " << endl << line << endl << endl;
1550 while (!f.eof() && (get_state() != parser_end)) {
1551 xmlParseChunk(context, line.c_str(), line.length(), 0);
1555 if (debug) cerr <<
"line: (" << line_num <<
"): " << endl << line << endl << endl;
1559 xmlParseChunk(context, line.c_str(), 0, 1);
1566 if (line.length() == 0)
throw Error(
"No input found while parsing the DMR.");
1568 if (debug) cerr <<
"line: (" << line_num <<
"): " << endl << line << endl << endl;
1570 context = xmlCreatePushParserCtxt(&dmrpp_sax_parser,
this, line.c_str(), line.length(),
"stream");
1571 context->validate =
true;
1572 push_state(parser_start);
1575 long chunk_count = 0;
1576 long chunk_size = 0;
1578 f.read(d_parse_buffer, D4_PARSE_BUFF_SIZE);
1579 chunk_size=f.gcount();
1580 d_parse_buffer[chunk_size]=0;
1581 if (debug) cerr <<
"chunk: (" << chunk_count++ <<
"): " << endl << d_parse_buffer << endl << endl;
1583 while(!f.eof() && (get_state() != parser_end)){
1585 xmlParseChunk(context, d_parse_buffer, chunk_size, 0);
1588 f.read(d_parse_buffer, D4_PARSE_BUFF_SIZE);
1589 chunk_size=f.gcount();
1590 d_parse_buffer[chunk_size]=0;
1591 if (debug) cerr <<
"chunk: (" << chunk_count++ <<
"): " << endl << d_parse_buffer << endl << endl;
1595 xmlParseChunk(context, d_parse_buffer, chunk_size, 1);
1615 void DmrppParserSax2::intern(
const string &document, DMR *dest_dmr,
bool debug)
1617 intern(document.c_str(), document.length(), dest_dmr, debug);
1630 void DmrppParserSax2::intern(
const char *buffer,
int size, DMR *dest_dmr,
bool debug)
1632 if (!(size > 0))
return;
1638 if (!dest_dmr)
throw InternalErr(__FILE__, __LINE__,
"DMR object is null");
1641 push_state(parser_start);
1642 context = xmlCreatePushParserCtxt(&dmrpp_sax_parser,
this, buffer, size,
"stream");
1643 context->validate =
true;
1646 xmlParseChunk(context, buffer, 0, 1);