libdap Updated for version 3.20.10
libdap4 is an implementation of OPeNDAP's DAP protocol.
parser-util.cc
1
2// -*- mode: c++; c-basic-offset:4 -*-
3
4// This file is part of libdap, A C++ implementation of the OPeNDAP Data
5// Access Protocol.
6
7// Copyright (c) 2002,2003 OPeNDAP, Inc.
8// Author: James Gallagher <jgallagher@opendap.org>
9//
10// This library is free software; you can redistribute it and/or
11// modify it under the terms of the GNU Lesser General Public
12// License as published by the Free Software Foundation; either
13// version 2.1 of the License, or (at your option) any later version.
14//
15// This library is distributed in the hope that it will be useful,
16// but WITHOUT ANY WARRANTY; without even the implied warranty of
17// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18// Lesser General Public License for more details.
19//
20// You should have received a copy of the GNU Lesser General Public
21// License along with this library; if not, write to the Free Software
22// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23//
24// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25
26// (c) COPYRIGHT URI/MIT 1995-1999
27// Please read the full copyright statement in the file COPYRIGHT_URI.
28//
29// Authors:
30// jhrg,jimg James Gallagher <jgallagher@gso.uri.edu>
31
32// These functions are utility functions used by the various DAP parsers (the
33// DAS, DDS and constraint expression parsers).
34// jhrg 9/7/95
35
36#include "config.h"
37
38#include <cerrno>
39#include <cassert>
40#include <cstring>
41#include <cmath>
42#include <cstdlib>
43
44#include <iostream>
45#include <sstream>
46
47// We wrap VC++ 6.x strtod() to account for a short comming
48// in that function in regards to "NaN".
49#ifdef WIN32
50#include <limits>
51double w32strtod(const char *, char **);
52#endif
53
54#include "Error.h"
55#include "debug.h"
56#include "parser.h" // defines constants such as ID_MAX
57#include "dods-limits.h"
58#include "util.h" // Jose Garcia: for append_long_to_string.
59
60using std::cerr;
61using std::endl;
62
63#ifdef WIN32
64// VC++ 6.x strtod() doesn't recognize "NaN". Account for it
65// by wrapping it around a check for the Nan string. Use of
66// the product is obsolete as of 1/2007, but it is unknown if
67// the issue is still there in later releases of that product.
68// ROM - 01/2007
69double w32strtod(const char *val, char **ptr)
70{
71 // Convert the two char arrays to compare to strings.
72 string *sval = new string(val);
73 string *snan = new string("NaN");
74
75 // If val doesn't contain "NaN|Nan|nan|etc", use strtod as
76 // provided.
77 if (stricmp(sval->c_str(), snan->c_str()) != 0)
78 return (strtod(val, ptr));
79
80 // But if it does, return the bit pattern for Nan and point
81 // the parsing ptr arg at the trailing '\0'.
82 *ptr = (char *) val + strlen(val);
83 return (std::numeric_limits < double >::quiet_NaN());
84}
85#endif
86
87namespace libdap {
88
89// Deprecated, but still used by the HDF4 EOS server code.
90void
91parse_error(parser_arg * arg, const char *msg, const int line_num,
92 const char *context)
93{
94 // Jose Garcia
95 // This assert(s) is (are) only for developing purposes
96 // For production servers remove it by compiling with NDEBUG
97 assert(arg);
98 assert(msg);
99
100 arg->set_status(FALSE);
101
102 string oss = "";
103
104 if (line_num != 0) {
105 oss += "Error parsing the text on line ";
106 append_long_to_string(line_num, 10, oss);
107 }
108 else {
109 oss += "Parse error.";
110 }
111
112 if (context)
113 oss += (string) " at or near: " + context + (string) "\n" + msg
114 + (string) "\n";
115 else
116 oss += (string) "\n" + msg + (string) "\n";
117
118 arg->set_error(new Error(unknown_error, oss));
119}
120
121void
122parse_error(const char *msg, const int line_num, const char *context)
123{
124 // Jose Garcia
125 // This assert(s) is (are) only for developing purposes
126 // For production servers remove it by compiling with NDEBUG
127 assert(msg);
128
129 string oss = "";
130
131 if (line_num != 0) {
132 oss += "Error parsing the text on line ";
133 append_long_to_string(line_num, 10, oss);
134 }
135 else {
136 oss += "Parse error.";
137 }
138
139 if (context)
140 oss += (string) " at or near: " + context + (string) "\n" + msg
141 + (string) "\n";
142 else
143 oss += (string) "\n" + msg + (string) "\n";
144
145 throw Error(malformed_expr, oss);
146}
147
148// context comes from the parser and will always be a char * unless the
149// parsers change dramatically.
150void
151parse_error(const string & msg, const int line_num, const char *context)
152{
153 parse_error(msg.c_str(), line_num, context);
154}
155
156#if 0
157// Remove this since it is not used and contains a potential (low level) vulnerability.
158// jhrg 3/7/22
159void save_str(char *dst, const char *src, const int line_num)
160{
161 if (strlen(src) >= ID_MAX)
162 parse_error(string("The word `") + string(src)
163 + string("' is too long (it should be no longer than ")
164 + long_to_string(ID_MAX) + string(")."), line_num);
165
166 strncpy(dst, src, ID_MAX);
167 dst[ID_MAX - 1] = '\0'; /* in case ... */
168}
169#endif
170
171void save_str(string & dst, const char *src, const int)
172{
173 dst = src;
174}
175
176bool is_keyword(string id, const string & keyword)
177{
178 downcase(id);
179 id = prune_spaces(id);
180 DBG(cerr << "is_keyword: " << keyword << " = " << id << endl);
181 return id == keyword;
182}
183
194int check_byte(const char *val)
195{
196 char *ptr;
197 long v = strtol(val, &ptr, 0);
198
199 if ((v == 0 && val == ptr) || *ptr != '\0') {
200 return FALSE;
201 }
202
203 DBG(cerr << "v: " << v << endl);
204
205 // We're very liberal here with values. Anything that can fit into 8 bits
206 // is allowed through. Clients will have to deal with the fact that the
207 // ASCII representation for the value might need to be tweaked. This is
208 // especially the case for Java clients where Byte datatypes are
209 // signed. 3/20/2000 jhrg
210 if ((v < 0 && v < DODS_SCHAR_MIN)
211 || (v > 0 && static_cast < unsigned long >(v) > DODS_UCHAR_MAX))
212 return FALSE;
213
214 return TRUE;
215}
216
217// This version of check_int will pass base 8, 10 and 16 numbers when they
218// use the ANSI standard for string representation of those number bases.
219
220int check_int16(const char *val)
221{
222 char *ptr;
223 long v = strtol(val, &ptr, 0); // `0' --> use val to determine base
224
225 if ((v == 0 && val == ptr) || *ptr != '\0') {
226 return FALSE;
227 }
228 // Don't use the constant from limits.h, use the ones in dods-limits.h
229 if (v > DODS_SHRT_MAX || v < DODS_SHRT_MIN) {
230 return FALSE;
231 }
232
233 return TRUE;
234}
235
236int check_uint16(const char *val)
237{
238 char *ptr;
239 unsigned long v = strtol(val, &ptr, 0);
240
241 if ((v == 0 && val == ptr) || *ptr != '\0') {
242 return FALSE;
243 }
244
245 if (v > DODS_USHRT_MAX) {
246 return FALSE;
247 }
248
249 return TRUE;
250}
251
252int check_int32(const char *val)
253{
254 char *ptr;
255 errno = 0;
256 long v = strtol(val, &ptr, 0); // `0' --> use val to determine base
257
258 if ((v == 0 && val == ptr) || *ptr != '\0') {
259 return FALSE;
260 }
261
262 // We need to check errno since strtol return clamps on overflow so the
263 // check against the DODS values below will always pass, even for out of
264 // bounds values in the string. mjohnson 7/20/09
265 if (errno == ERANGE) {
266 return FALSE;
267 }
268 // This could be combined with the above, or course, but I'm making it
269 // separate to highlight the test. On 64-bit linux boxes 'long' may be
270 // 64-bits and so 'v' can hold more than a DODS_INT32. jhrg 3/23/10
271 else if (v > DODS_INT_MAX || v < DODS_INT_MIN) {
272 return FALSE;
273 }
274 else {
275 return TRUE;
276 }
277}
278
279int check_uint32(const char *val)
280{
281 // Eat whitespace and check for an initial '-' sign...
282 // strtoul allows an initial minus. mjohnson
283 const char* c = val;
284 while (c && isspace(*c)) {
285 c++;
286 }
287 if (c && (*c == '-')) {
288 return FALSE;
289 }
290
291 char *ptr;
292 errno = 0;
293 unsigned long v = strtoul(val, &ptr, 0);
294
295 if ((v == 0 && val == ptr) || *ptr != '\0') {
296 return FALSE;
297 }
298
299 // check overflow first, or the below check is invalid due to
300 // clamping to the maximum value by strtoul
301 // maybe consider using long long for these checks? mjohnson
302 if (errno == ERANGE) {
303 return FALSE;
304 }
305 // See above.
306 else if (v > DODS_UINT_MAX) {
307 return FALSE;
308 }
309 else {
310 return TRUE;
311 }
312}
313
314int check_int32(const char *val, int &v)
315{
316 char *ptr;
317 errno = 0;
318 long tmp = strtol(val, &ptr, 0); // `0' --> use val to determine base
319
320 if ((tmp == 0 && val == ptr) || *ptr != '\0') {
321 return FALSE;
322 }
323
324 // We need to check errno since strtol return clamps on overflow so the
325 // check against the DODS values below will always pass, even for out of
326 // bounds values in the string. mjohnson 7/20/09
327 if (errno == ERANGE) {
328 return FALSE;
329 }
330 // This could be combined with the above, or course, but I'm making it
331 // separate to highlight the test. On 64-bit linux boxes 'long' may be
332 // 64-bits and so 'v' can hold more than a DODS_INT32. jhrg 3/23/10
333 else if (tmp > DODS_INT_MAX || tmp < DODS_INT_MIN) {
334 return FALSE;
335 }
336 else {
337 v = (int)tmp;
338 return TRUE;
339 }
340}
341
342int check_uint32(const char *val, unsigned int &v)
343{
344 // Eat whitespace and check for an initial '-' sign...
345 // strtoul allows an initial minus. mjohnson
346 const char* c = val;
347 while (c && isspace(*c)) {
348 c++;
349 }
350 if (c && (*c == '-')) {
351 return FALSE;
352 }
353
354 char *ptr;
355 errno = 0;
356 unsigned long tmp = strtoul(val, &ptr, 0);
357
358 if ((tmp == 0 && val == ptr) || *ptr != '\0') {
359 return FALSE;
360 }
361
362 // check overflow first, or the below check is invalid due to
363 // clamping to the maximum value by strtoul
364 // maybe consider using long long for these checks? mjohnson
365 if (errno == ERANGE) {
366 return FALSE;
367 }
368 // See above.
369 else if (tmp > DODS_UINT_MAX) {
370 return FALSE;
371 }
372 else {
373 v = (unsigned int)tmp;
374 return TRUE;
375 }
376}
377
378int check_int64(const char *val)
379{
380 char *ptr;
381 errno = 0;
382 long long v = strtoll(val, &ptr, 0); // `0' --> use val to determine base
383
384 if ((v == 0 && val == ptr) || *ptr != '\0') {
385 return FALSE;
386 }
387
388 // We need to check errno since strtol return clamps on overflow so the
389 // check against the DODS values below will always pass, even for out of
390 // bounds values in the string. mjohnson 7/20/09
391 if (errno == ERANGE) {
392 return FALSE;
393 }
394#if 0
395 // This could be combined with the above, or course, but I'm making it
396 // separate to highlight the test. On 64-bit linux boxes 'long' may be
397 // 64-bits and so 'v' can hold more than a DODS_INT32. jhrg 3/23/10
398 //
399 // Removed - Coverity says it can never be false. Makes sense. jhrg 5/10/16
400 else if (v <= DODS_LLONG_MAX && v >= DODS_LLONG_MIN) {
401 return FALSE;
402 }
403#endif
404 else {
405 return TRUE;
406 }
407}
408
409int check_uint64(const char *val)
410{
411 // Eat whitespace and check for an initial '-' sign...
412 // strtoul allows an initial minus. mjohnson
413 const char* c = val;
414 while (c && isspace(*c)) {
415 c++;
416 }
417 if (c && (*c == '-')) {
418 return FALSE;
419 }
420
421 char *ptr;
422 errno = 0;
423 unsigned long long v = strtoull(val, &ptr, 0);
424
425 if ((v == 0 && val == ptr) || *ptr != '\0') {
426 return FALSE;
427 }
428
429 if (errno == ERANGE) {
430 return FALSE;
431 }
432 else if (v > DODS_ULLONG_MAX) { // 2^61
433 return FALSE;
434 }
435 else {
436 return v;
437 }
438}
439
440// Check first for system errors (like numbers so small they convert
441// (erroneously) to zero. Then make sure that the value is within
442// limits.
443
444int check_float32(const char *val)
445{
446 char *ptr;
447 errno = 0; // Clear previous value. Fix for the 64bit
448 // IRIX from Rob Morris. 5/21/2001 jhrg
449
450#ifdef WIN32
451 double v = w32strtod(val, &ptr);
452#else
453 double v = strtod(val, &ptr);
454#endif
455
456 DBG(cerr << "v: " << v << ", ptr: " << ptr
457 << ", errno: " << errno << ", val==ptr: " << (val == ptr) << endl);
458
459 if (errno == ERANGE || (v == 0.0 && val == ptr) || *ptr != '\0')
460 return FALSE;
461
462#if 0
463 if ((v == 0.0 && (val == ptr || errno == HUGE_VAL || errno == ERANGE))
464 || *ptr != '\0') {
465 return FALSE;
466 }
467#endif
468
469 DBG(cerr << "fabs(" << val << ") = " << fabs(v) << endl);
470 double abs_val = fabs(v);
471 if (abs_val > DODS_FLT_MAX
472 || (abs_val != 0.0 && abs_val < DODS_FLT_MIN))
473 return FALSE;
474
475 return TRUE;
476}
477
478int check_float64(const char *val)
479{
480 DBG(cerr << "val: " << val << endl);
481 char *ptr;
482 errno = 0; // Clear previous value. 5/21/2001 jhrg
483
484#ifdef WIN32
485 double v = w32strtod(val, &ptr);
486#else
487 double v = strtod(val, &ptr);
488#endif
489
490 DBG(cerr << "v: " << v << ", ptr: " << ptr
491 << ", errno: " << errno << ", val==ptr: " << (val == ptr) << endl);
492
493
494 if (errno == ERANGE || (v == 0.0 && val == ptr) || *ptr != '\0')
495 return FALSE;
496#if 0
497 if ((v == 0.0 && (val == ptr || errno == HUGE_VAL || errno == ERANGE))
498 || *ptr != '\0') {
499 return FALSE;
500 }
501#endif
502 DBG(cerr << "fabs(" << val << ") = " << fabs(v) << endl);
503 double abs_val = fabs(v);
504 if (abs_val > DODS_DBL_MAX
505 || (abs_val != 0.0 && abs_val < DODS_DBL_MIN))
506 return FALSE;
507
508 return TRUE;
509}
510
511int check_float64(const char *val, double &v)
512{
513 DBG(cerr << "val: " << val << endl);
514 char *ptr;
515 errno = 0; // Clear previous value. 5/21/2001 jhrg
516
517#ifdef WIN32
518 v = w32strtod(val, &ptr);
519#else
520 v = strtod(val, &ptr);
521#endif
522
523 DBG(cerr << "v: " << v << ", ptr: " << ptr
524 << ", errno: " << errno << ", val==ptr: " << (val == ptr) << endl);
525
526
527 if (errno == ERANGE || (v == 0.0 && val == ptr) || *ptr != '\0')
528 return FALSE;
529#if 0
530 if ((v == 0.0 && (val == ptr || errno == HUGE_VAL || errno == ERANGE))
531 || *ptr != '\0') {
532 return FALSE;
533 }
534#endif
535 DBG(cerr << "fabs(" << val << ") = " << fabs(v) << endl);
536 double abs_val = fabs(v);
537 if (abs_val > DODS_DBL_MAX
538 || (abs_val != 0.0 && abs_val < DODS_DBL_MIN))
539 return FALSE;
540
541 return TRUE;
542}
543
544long long get_int64(const char *val)
545{
546 char *ptr;
547 errno = 0;
548 long long v = strtoll(val, &ptr, 0); // `0' --> use val to determine base
549
550 if ((v == 0 && val == ptr) || *ptr != '\0') {
551 throw Error("Expected a 64-bit integer, but found other characters.");
552 // The value '" + string(val) + "' contains extra characters.");
553 }
554
555 // We need to check errno since strtol return clamps on overflow so the
556 // check against the DODS values below will always pass, even for out of
557 // bounds values in the string. mjohnson 7/20/09
558 if (errno == ERANGE) {
559 throw Error("The 64-bit integer value is out of range.");
560 }
561
562#if 0
563 // This could be combined with the above, or course, but I'm making it
564 // separate to highlight the test. On 64-bit linux boxes 'long' may be
565 // 64-bits and so 'v' can hold more than a DODS_INT32. jhrg 3/23/10
566 //
567 // Removed because coverity flags it as useless, which it is until we
568 // have 128-bit ints... jhrg 5/9/16
569 else if (v > DODS_LLONG_MAX || v < DODS_LLONG_MIN) {
570 throw Error("The value '" + string(val) + "' is out of range.");
571 }
572#endif
573
574 else {
575 return v;
576 }
577}
578
579unsigned long long get_uint64(const char *val)
580{
581 // Eat whitespace and check for an initial '-' sign...
582 // strtoul allows an initial minus. mjohnson
583 const char* c = val;
584 while (c && isspace(*c)) {
585 c++;
586 }
587 if (c && (*c == '-')) {
588 throw Error("Expected a valid array index.");
589 }
590
591 char *ptr;
592 errno = 0;
593 unsigned long long v = strtoull(val, &ptr, 0);
594
595 if ((v == 0 && val == ptr) || *ptr != '\0') {
596 throw Error("Expected an unsigned 64-bit integer, but found other characters.");
597 }
598
599 if (errno == ERANGE) {
600 throw Error("The 64-bit integer value is out of range.");
601 }
602#if 0
603 // Coverity; see above. jhrg 5/9/16
604 else if (v > DODS_MAX_ARRAY_INDEX) { // 2^61
605 throw Error("The value '" + string(val) + "' is out of range.");
606 }
607#endif
608 else {
609 return v;
610 }
611}
612
613int get_int32(const char *val)
614{
615 char *ptr;
616 errno = 0;
617 int v = strtol(val, &ptr, 0); // `0' --> use val to determine base
618
619 if ((v == 0 && val == ptr) || *ptr != '\0') {
620 throw Error("Expected a 32-bit integer, but found other characters.");
621 }
622
623 // We need to check errno since strtol return clamps on overflow so the
624 // check against the DODS values below will always pass, even for out of
625 // bounds values in the string. mjohnson 7/20/09
626 if (errno == ERANGE) {
627 throw Error("The 32-bit integer value is out of range.");
628 }
629 // This could be combined with the above, or course, but I'm making it
630 // separate to highlight the test. On 64-bit linux boxes 'long' may be
631 // 64-bits and so 'v' can hold more than a DODS_INT32. jhrg 3/23/10
632 else if (v > DODS_INT_MAX || v < DODS_INT_MIN) {
633 return FALSE;
634 }
635
636 else {
637 return v;
638 }
639}
640
641unsigned int get_uint32(const char *val)
642{
643 // Eat whitespace and check for an initial '-' sign...
644 // strtoul allows an initial minus. mjohnson
645 const char* c = val;
646 while (c && isspace(*c)) {
647 c++;
648 }
649 if (c && (*c == '-')) {
650 throw Error("Expected an unsigned 32-bit integer, but found other characters.");
651 }
652
653 char *ptr;
654 errno = 0;
655 unsigned int v = strtoul(val, &ptr, 0);
656
657 if ((v == 0 && val == ptr) || *ptr != '\0') {
658 throw Error("Expected an unsigned 32-bit integer, but found other characters.");
659 }
660
661 if (errno == ERANGE) {
662 throw Error("The 32-bit integer value is out of range.");
663 }
664 // See above.
665 else if (v > DODS_UINT_MAX) {
666 return FALSE;
667 }
668 else {
669 return v;
670 }
671}
672
673double get_float64(const char *val)
674{
675 DBG(cerr << "val: " << val << endl);
676 char *ptr;
677 errno = 0; // Clear previous value. 5/21/2001 jhrg
678
679#ifdef WIN32
680 double v = w32strtod(val, &ptr);
681#else
682 double v = strtod(val, &ptr);
683#endif
684
685 if (errno == ERANGE || (v == 0.0 && val == ptr) || *ptr != '\0')
686 throw Error("The 64-bit floating point value is out of range.");;
687
688 DBG(cerr << "fabs(" << val << ") = " << fabs(v) << endl);
689 double abs_val = fabs(v);
690 if (abs_val > DODS_DBL_MAX || (abs_val != 0.0 && abs_val < DODS_DBL_MIN))
691 throw Error("The 64-bit floating point value is out of range.");;
692
693 return v;
694}
695
696/*
697 Maybe someday we will really check the Urls to see if they are valid...
698*/
699
700int check_url(const char *)
701{
702 return TRUE;
703}
704
705} // namespace libdap
int check_url(const char *)
Is the value a valid URL?
int check_byte(const char *val)
Is the value a valid byte?
top level DAP object to house generic methods
string prune_spaces(const string &name)
Definition util.cc:459
void save_str(string &dst, const char *src, const int)
Save a string to a temporary variable during the parse.
void downcase(string &s)
Definition util.cc:566