libdap Updated for version 3.20.10
libdap4 is an implementation of OPeNDAP's DAP protocol.
D4ParserSax2.h
1
2// -*- mode: c++; c-basic-offset:4 -*-
3
4// This file is part of libdap, A C++ implementation of the OPeNDAP Data
5// Access Protocol.
6
7// Copyright (c) 2012 OPeNDAP, Inc.
8// Author: James Gallagher <jgallagher@opendap.org>
9//
10// This library is free software; you can redistribute it and/or
11// modify it under the terms of the GNU Lesser General Public
12// License as published by the Free Software Foundation; either
13// version 2.1 of the License, or (at your option) any later version.
14//
15// This library is distributed in the hope that it will be useful,
16// but WITHOUT ANY WARRANTY; without even the implied warranty of
17// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18// Lesser General Public License for more details.
19//
20// You should have received a copy of the GNU Lesser General Public
21// License along with this library; if not, write to the Free Software
22// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23//
24// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25
26#ifndef d4_parser_sax2_h
27#define d4_parser_sax2_h
28
29#define ATTR 1
30
31#include <string.h>
32
33#include <string>
34#include <iostream>
35#include <map>
36#include <stack>
37
38#include <libxml/parserInternals.h>
39
40#define CRLF "\r\n"
41#define D4_PARSE_BUFF_SIZE 1048576
42
43namespace libdap
44{
45
46class DMR;
47class BaseType;
48class D4BaseTypeFactory;
49class D4Group;
50class D4Attributes;
51class D4EnumDef;
52class D4Dimension;
53
78{
79private:
82 enum ParseState {
83 parser_start,
84
85 inside_dataset,
86
87 // inside_group is the state just after parsing the start of a Group
88 // element.
89 inside_group,
90
91 inside_attribute_container,
92 inside_attribute,
93 inside_attribute_value,
94 inside_other_xml_attribute,
95
96 inside_enum_def,
97 inside_enum_const,
98
99 inside_dim_def,
100
101 // This covers Byte, ..., Url, Opaque
102 inside_simple_type,
103
104 // inside_array,
105 inside_dim,
106 inside_map,
107
108 inside_constructor,
109
110 // inside_sequence, Removed from merged code jhrg 5/2/14
111
112 not_dap4_element,
113
114 parser_unknown,
115 parser_error,
116 parser_fatal_error,
117
118 parser_end
119 };
120 char d_parse_buffer[D4_PARSE_BUFF_SIZE+1]; // Buff size plus one byte for NULL termination.
121
122 xmlSAXHandler d_dmr_sax_parser;
123
124 // The results of the parse operation are stored in these fields.
125 // This is passed into the parser using the intern() methods.
126 DMR *d_dmr; // dump DMR here
127 DMR *dmr() const { return d_dmr; }
128
129 // These stacks hold the state of the parse as it progresses.
130 stack<ParseState> s; // Current parse state
131 void push_state(D4ParserSax2::ParseState state) { s.push(state); }
132 D4ParserSax2::ParseState get_state() const { return s.top(); }
133 void pop_state() { s.pop(); }
134 bool empty_state() const { return s.empty(); }
135
136 stack<BaseType*> btp_stack; // current variable(s)
137 void push_basetype(BaseType *btp) { btp_stack.push(btp); }
138 BaseType *top_basetype() const { return btp_stack.top(); }
139 void pop_basetype() { btp_stack.pop(); }
140 bool empty_basetype() const { return btp_stack.empty(); }
141
142 stack<D4Group*> grp_stack; // current groups(s)
143 void push_group(D4Group *grp) { grp_stack.push(grp); }
144 D4Group *top_group() const { return grp_stack.top(); }
145 void pop_group() { grp_stack.pop(); }
146 bool empty_group() const { return grp_stack.empty(); }
147
148 stack<D4Attributes*> d_attrs_stack; // DAP4 Attributes
149 void push_attributes(D4Attributes *attr) { d_attrs_stack.push(attr); }
150 D4Attributes *top_attributes() const { return d_attrs_stack.top(); }
151 void pop_attributes() { d_attrs_stack.pop(); }
152 bool empty_attributes() const { return d_attrs_stack.empty(); }
153
154 D4EnumDef *d_enum_def;
155 D4EnumDef *enum_def();
156 void clear_enum_def() { d_enum_def = 0; }
157
158 D4Dimension *d_dim_def;
159 D4Dimension *dim_def();
160 void clear_dim_def() { d_dim_def = 0; }
161
162 // Accumulate stuff inside an 'OtherXML' DAP attribute here
163 string other_xml;
164
165 // When we're parsing unknown XML, how deeply is it nested? This is used
166 // for the OtherXML DAP attributes.
167 unsigned int other_xml_depth;
168 unsigned int unknown_depth;
169
170 // These are used for processing errors.
171 string d_error_msg; // Error message(s), if any.
172 xmlParserCtxtPtr d_context; // used for error message line numbers
173
174 // These hold temporary values read during the parse.
175 string dods_attr_name; // DAP4 attributes, not XML attributes
176 string dods_attr_type; // ... not XML ...
177 string char_data; // char data in value elements; null after use
178 string root_ns; // What is the namespace of the root node (Group)
179
180 bool d_debug;
181 bool debug() const { return d_debug; }
182
183 bool d_strict;
184
185 class XMLAttribute {
186 public:
187 string prefix;
188 string nsURI;
189 string value;
190
191 void clone(const XMLAttribute &src) {
192 prefix = src.prefix;
193 nsURI = src.nsURI;
194 value = src.value;
195 }
196
197 XMLAttribute() : prefix(""), nsURI(""), value("") {}
198 XMLAttribute(const string &p, const string &ns, const string &v)
199 : prefix(p), nsURI(ns), value(v) {}
200 // 'attributes' as passed from libxml2 is a five element array but this
201 // ctor gets the back four elements.
202 XMLAttribute(const xmlChar **attributes/*[4]*/) {
203 prefix = attributes[0] != 0 ? (const char *)attributes[0]: "";
204 nsURI = attributes[1] != 0 ? (const char *)attributes[1]: "";
205 value = string((const char *)attributes[2], (const char *)attributes[3]);
206 }
207 XMLAttribute(const XMLAttribute &rhs) {
208 clone(rhs);
209 }
210 ~XMLAttribute() {
211 }
212 XMLAttribute &operator=(const XMLAttribute &rhs) {
213 if (this == &rhs)
214 return *this;
215 clone(rhs);
216 return *this;
217 }
218 };
219
220 typedef map<string, XMLAttribute> XMLAttrMap;
221 XMLAttrMap xml_attrs; // dump XML attributes here
222
223 XMLAttrMap::iterator xml_attr_begin() { return xml_attrs.begin(); }
224
225 XMLAttrMap::iterator xml_attr_end() { return xml_attrs.end(); }
226
227 map<string, string> namespace_table;
228
229 void cleanup_parse();
230
237 void transfer_xml_attrs(const xmlChar **attrs, int nb_attributes);
238 void transfer_xml_ns(const xmlChar **namespaces, int nb_namespaces);
239 bool check_required_attribute(const string &attr);
240 bool check_attribute(const string & attr);
241 void process_variable_helper(Type t, ParseState s, const xmlChar **attrs, int nb_attributes);
242
243 void process_enum_const_helper(const xmlChar **attrs, int nb_attributes);
244 void process_enum_def_helper(const xmlChar **attrs, int nb_attributes);
245
246 bool process_dimension(const char *name, const xmlChar **attrs, int nb_attrs);
247 bool process_dimension_def(const char *name, const xmlChar **attrs, int nb_attrs);
248 bool process_map(const char *name, const xmlChar **attrs, int nb_attributes);
249 bool process_attribute(const char *name, const xmlChar **attrs, int nb_attributes);
250 bool process_variable(const char *name, const xmlChar **attrs, int nb_attributes);
251 bool process_group(const char *name, const xmlChar **attrs, int nb_attributes);
252 bool process_enum_def(const char *name, const xmlChar **attrs, int nb_attributes);
253 bool process_enum_const(const char *name, const xmlChar **attrs, int nb_attributes);
254
255 void finish_variable(const char *tag, Type t, const char *expected);
257
258 friend class D4ParserSax2Test;
259
260public:
261 D4ParserSax2() :
262 d_dmr(0), d_enum_def(0), d_dim_def(0),
263 other_xml(""), other_xml_depth(0), unknown_depth(0),
264 d_error_msg(""), d_context(0),
265 dods_attr_name(""), dods_attr_type(""),
266 char_data(""), root_ns(""), d_debug(false), d_strict(true)
267 {
268 //xmlSAXHandler ddx_sax_parser;
269 memset(&d_dmr_sax_parser, 0, sizeof(xmlSAXHandler));
270
271 d_dmr_sax_parser.getEntity = &D4ParserSax2::dmr_get_entity;
272 d_dmr_sax_parser.startDocument = &D4ParserSax2::dmr_start_document;
273 d_dmr_sax_parser.endDocument = &D4ParserSax2::dmr_end_document;
274 d_dmr_sax_parser.characters = &D4ParserSax2::dmr_get_characters;
275 d_dmr_sax_parser.ignorableWhitespace = &D4ParserSax2::dmr_ignoreable_whitespace;
276 d_dmr_sax_parser.cdataBlock = &D4ParserSax2::dmr_get_cdata;
277 d_dmr_sax_parser.warning = &D4ParserSax2::dmr_error;
278 d_dmr_sax_parser.error = &D4ParserSax2::dmr_error;
279 d_dmr_sax_parser.fatalError = &D4ParserSax2::dmr_fatal_error;
280 d_dmr_sax_parser.initialized = XML_SAX2_MAGIC;
281 d_dmr_sax_parser.startElementNs = &D4ParserSax2::dmr_start_element;
282 d_dmr_sax_parser.endElementNs = &D4ParserSax2::dmr_end_element;
283 }
284
285 void intern(istream &f, DMR *dest_dmr, bool debug = false);
286 // Deprecated - this does not read from a file, it parses text in the string 'document'
287 void intern(const string &document, DMR *dest_dmr, bool debug = false);
288 void intern(const char *buffer, int size, DMR *dest_dmr, bool debug = false);
289
302 void set_strict(bool s) { d_strict = s; }
306 bool get_strict() const { return d_strict; }
309 static void dmr_start_document(void *parser);
310 static void dmr_end_document(void *parser);
311
312 static void dmr_start_element(void *parser,
313 const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI,
314 int nb_namespaces, const xmlChar **namespaces, int nb_attributes,
315 int nb_defaulted, const xmlChar **attributes);
316 static void dmr_end_element(void *parser, const xmlChar *localname,
317 const xmlChar *prefix, const xmlChar *URI);
318
319 static void dmr_get_characters(void *parser, const xmlChar *ch, int len);
320 static void dmr_ignoreable_whitespace(void *parser,
321 const xmlChar * ch, int len);
322 static void dmr_get_cdata(void *parser, const xmlChar *value, int len);
323
324 static xmlEntityPtr dmr_get_entity(void *parser, const xmlChar *name);
325 static void dmr_fatal_error(void *parser, const char *msg, ...);
326 static void dmr_error(void *parser, const char *msg, ...);
327};
328
329} // namespace libdap
330
331#endif // d4_parser_sax2_h
The basic data type for the DODS DAP types.
Definition BaseType.h:118
void intern(istream &f, DMR *dest_dmr, bool debug=false)
bool get_strict() const
Get the setting of the 'strict' mode.
static void dmr_start_document(void *parser)
static void dmr_start_element(void *parser, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI, int nb_namespaces, const xmlChar **namespaces, int nb_attributes, int nb_defaulted, const xmlChar **attributes)
static void dmr_ignoreable_whitespace(void *parser, const xmlChar *ch, int len)
static void dmr_get_characters(void *parser, const xmlChar *ch, int len)
static void dmr_end_document(void *parser)
void set_strict(bool s)
Set the 'strict' mode to true or false.
static void dmr_get_cdata(void *parser, const xmlChar *value, int len)
static xmlEntityPtr dmr_get_entity(void *parser, const xmlChar *name)
static void dmr_fatal_error(void *parser, const char *msg,...)
top level DAP object to house generic methods
Type
Identifies the data type.
Definition Type.h:94