libdap++  Updated for version 3.13.3
HTTPConnect.cc
Go to the documentation of this file.
1 
2 // -*- mode: c++; c-basic-offset:4 -*-
3 
4 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
5 // Access Protocol.
6 
7 // Copyright (c) 2002,2003 OPeNDAP, Inc.
8 // Author: James Gallagher <jgallagher@opendap.org>
9 //
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
14 //
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 //
24 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25 
26 
27 #include "config.h"
28 
29 #ifdef HAVE_UNISTD_H
30 #include <unistd.h>
31 #endif
32 
33 #include <sys/stat.h>
34 
35 #ifdef WIN32
36 #include <io.h>
37 #endif
38 
39 #include <string>
40 #include <vector>
41 #include <functional>
42 #include <algorithm>
43 #include <sstream>
44 #include <iterator>
45 #include <cstdlib>
46 #include <cstring>
47 
48 //#define DODS_DEBUG2
49 //#define HTTP_TRACE
50 //#define DODS_DEBUG
51 
52 #undef USE_GETENV
53 
54 
55 #include "debug.h"
56 #include "mime_util.h"
57 #include "GNURegex.h"
58 #include "HTTPCache.h"
59 #include "HTTPConnect.h"
60 #include "RCReader.h"
61 #include "HTTPResponse.h"
62 #include "HTTPCacheResponse.h"
63 
64 using namespace std;
65 
66 namespace libdap {
67 
68 // These global variables are not MT-Safe, but I'm leaving them as is because
69 // they are used only for debugging (set them in a debugger like gdb or ddd).
70 // They are not static because I think that many debuggers cannot access
71 // static variables. 08/07/02 jhrg
72 
73 // Set this to 1 to turn on libcurl's verbose mode (for debugging).
74 int www_trace = 0;
75 
76 // Keep the temporary files; useful for debugging.
78 
79 #define CLIENT_ERR_MIN 400
80 #define CLIENT_ERR_MAX 417
81 static const char *http_client_errors[CLIENT_ERR_MAX - CLIENT_ERR_MIN +1] =
82  {
83  "Bad Request:",
84  "Unauthorized: Contact the server administrator.",
85  "Payment Required.",
86  "Forbidden: Contact the server administrator.",
87  "Not Found: The data source or server could not be found.\n\
88  Often this means that the OPeNDAP server is missing or needs attention;\n\
89  Please contact the server administrator.",
90  "Method Not Allowed.",
91  "Not Acceptable.",
92  "Proxy Authentication Required.",
93  "Request Time-out.",
94  "Conflict.",
95  "Gone:.",
96  "Length Required.",
97  "Precondition Failed.",
98  "Request Entity Too Large.",
99  "Request URI Too Large.",
100  "Unsupported Media Type.",
101  "Requested Range Not Satisfiable.",
102  "Expectation Failed."
103  };
104 
105 #define SERVER_ERR_MIN 500
106 #define SERVER_ERR_MAX 505
107 static const char *http_server_errors[SERVER_ERR_MAX - SERVER_ERR_MIN + 1] =
108  {
109  "Internal Server Error.",
110  "Not Implemented.",
111  "Bad Gateway.",
112  "Service Unavailable.",
113  "Gateway Time-out.",
114  "HTTP Version Not Supported."
115  };
116 
119 static string
120 http_status_to_string(int status)
121 {
122  if (status >= CLIENT_ERR_MIN && status <= CLIENT_ERR_MAX)
123  return string(http_client_errors[status - CLIENT_ERR_MIN]);
124  else if (status >= SERVER_ERR_MIN && status <= SERVER_ERR_MAX)
125  return string(http_server_errors[status - SERVER_ERR_MIN]);
126  else
127  return string("Unknown Error: This indicates a problem with libdap++.\nPlease report this to support@opendap.org.");
128 }
129 
134 class ParseHeader : public unary_function<const string &, void>
135 {
136  ObjectType type; // What type of object is in the stream?
137  string server; // Server's version string.
138  string protocol; // Server's protocol version.
139  string location; // Url returned by server
140 
141 public:
142  ParseHeader() : type(unknown_type), server("dods/0.0"), protocol("2.0")
143  { }
144 
145  void operator()(const string &line)
146  {
147  string name, value;
148  parse_mime_header(line, name, value);
149  if (name == "content-description") {
150  DBG2(cerr << name << ": " << value << endl);
151  type = get_description_type(value);
152  }
153  // The second test (== "dods/0.0") tests if xopendap-server has already
154  // been seen. If so, use that header in preference to the old
155  // XDODS-Server header. jhrg 2/7/06
156  else if (name == "xdods-server" && server == "dods/0.0") {
157  DBG2(cerr << name << ": " << value << endl);
158  server = value;
159  }
160  else if (name == "xopendap-server") {
161  DBG2(cerr << name << ": " << value << endl);
162  server = value;
163  }
164  else if (name == "xdap") {
165  DBG2(cerr << name << ": " << value << endl);
166  protocol = value;
167  }
168  else if (server == "dods/0.0" && name == "server") {
169  DBG2(cerr << name << ": " << value << endl);
170  server = value;
171  }
172  else if (name == "location") {
173  DBG2(cerr << name << ": " << value << endl);
174  location = value;
175  }
176  else if (type == unknown_type && name == "content-type"
177  && line.find("text/html") != string::npos) {
178  DBG2(cerr << name << ": text/html..." << endl);
179  type = web_error;
180  }
181  }
182 
183  ObjectType get_object_type()
184  {
185  return type;
186  }
187 
188  string get_server()
189  {
190  return server;
191  }
192 
193  string get_protocol()
194  {
195  return protocol;
196  }
197 
198  string get_location() {
199  return location;
200  }
201 };
202 
219 static size_t
220 save_raw_http_headers(void *ptr, size_t size, size_t nmemb, void *resp_hdrs)
221 {
222  DBG2(cerr << "Inside the header parser." << endl);
223  vector<string> *hdrs = static_cast<vector<string> * >(resp_hdrs);
224 
225  // Grab the header, minus the trailing newline. Or \r\n pair.
226  string complete_line;
227  if (nmemb > 1 && *(static_cast<char*>(ptr) + size * (nmemb - 2)) == '\r')
228  complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 2));
229  else
230  complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 1));
231 
232  // Store all non-empty headers that are not HTTP status codes
233  if (complete_line != "" && complete_line.find("HTTP") == string::npos) {
234  DBG(cerr << "Header line: " << complete_line << endl);
235  hdrs->push_back(complete_line);
236  }
237 
238  return size * nmemb;
239 }
240 
242 static int
243 curl_debug(CURL *, curl_infotype info, char *msg, size_t size, void *)
244 {
245  string message(msg, size);
246 
247  switch (info) {
248  case CURLINFO_TEXT:
249  cerr << "Text: " << message; break;
250  case CURLINFO_HEADER_IN:
251  cerr << "Header in: " << message; break;
252  case CURLINFO_HEADER_OUT:
253  cerr << "Header out: " << message; break;
254  case CURLINFO_DATA_IN:
255  cerr << "Data in: " << message; break;
256  case CURLINFO_DATA_OUT:
257  cerr << "Data out: " << message; break;
258  case CURLINFO_END:
259  cerr << "End: " << message; break;
260 #ifdef CURLINFO_SSL_DATA_IN
261  case CURLINFO_SSL_DATA_IN:
262  cerr << "SSL Data in: " << message; break;
263 #endif
264 #ifdef CURLINFO_SSL_DATA_OUT
265  case CURLINFO_SSL_DATA_OUT:
266  cerr << "SSL Data out: " << message; break;
267 #endif
268  default:
269  cerr << "Curl info: " << message; break;
270  }
271  return 0;
272 }
273 
277 void
278 HTTPConnect::www_lib_init()
279 {
280  d_curl = curl_easy_init();
281  if (!d_curl)
282  throw InternalErr(__FILE__, __LINE__, "Could not initialize libcurl.");
283 
284  // Now set options that will remain constant for the duration of this
285  // CURL object.
286 
287  // Set the proxy host.
288  if (!d_rcr->get_proxy_server_host().empty()) {
289  DBG(cerr << "Setting up a proxy server." << endl);
290  DBG(cerr << "Proxy host: " << d_rcr->get_proxy_server_host()
291  << endl);
292  DBG(cerr << "Proxy port: " << d_rcr->get_proxy_server_port()
293  << endl);
294  DBG(cerr << "Proxy pwd : " << d_rcr->get_proxy_server_userpw()
295  << endl);
296  curl_easy_setopt(d_curl, CURLOPT_PROXY,
297  d_rcr->get_proxy_server_host().c_str());
298  curl_easy_setopt(d_curl, CURLOPT_PROXYPORT,
299  d_rcr->get_proxy_server_port());
300 
301  // As of 4/21/08 only NTLM, Digest and Basic work.
302 #ifdef CURLOPT_PROXYAUTH
303  curl_easy_setopt(d_curl, CURLOPT_PROXYAUTH, (long)CURLAUTH_ANY);
304 #endif
305 
306  // Password might not be required. 06/21/04 jhrg
307  if (!d_rcr->get_proxy_server_userpw().empty())
308  curl_easy_setopt(d_curl, CURLOPT_PROXYUSERPWD,
309  d_rcr->get_proxy_server_userpw().c_str());
310  }
311 
312  curl_easy_setopt(d_curl, CURLOPT_ERRORBUFFER, d_error_buffer);
313  // We have to set FailOnError to false for any of the non-Basic
314  // authentication schemes to work. 07/28/03 jhrg
315  curl_easy_setopt(d_curl, CURLOPT_FAILONERROR, 0);
316 
317  // This means libcurl will use Basic, Digest, GSS Negotiate, or NTLM,
318  // choosing the the 'safest' one supported by the server.
319  // This requires curl 7.10.6 which is still in pre-release. 07/25/03 jhrg
320  curl_easy_setopt(d_curl, CURLOPT_HTTPAUTH, (long)CURLAUTH_ANY);
321 
322  curl_easy_setopt(d_curl, CURLOPT_NOPROGRESS, 1);
323  curl_easy_setopt(d_curl, CURLOPT_NOSIGNAL, 1);
324  curl_easy_setopt(d_curl, CURLOPT_HEADERFUNCTION, save_raw_http_headers);
325  // In read_url a call to CURLOPT_WRITEHEADER is used to set the fourth
326  // param of save_raw_http_headers to a vector<string> object.
327 
328  // Follow 302 (redirect) responses
329  curl_easy_setopt(d_curl, CURLOPT_FOLLOWLOCATION, 1);
330  curl_easy_setopt(d_curl, CURLOPT_MAXREDIRS, 5);
331 
332  // If the user turns off SSL validation...
333  if (!d_rcr->get_validate_ssl() == 0) {
334  curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYPEER, 0);
335  curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYHOST, 0);
336  }
337 
338  // Look to see if cookies are turned on in the .dodsrc file. If so,
339  // activate here. We honor 'session cookies' (cookies without an
340  // expiration date) here so that session-base SSO systems will work as
341  // expected.
342  if (!d_cookie_jar.empty()) {
343  DBG(cerr << "Setting the cookie jar to: " << d_cookie_jar << endl);
344  curl_easy_setopt(d_curl, CURLOPT_COOKIEJAR, d_cookie_jar.c_str());
345  curl_easy_setopt(d_curl, CURLOPT_COOKIESESSION, 1);
346  }
347 
348  if (www_trace) {
349  cerr << "Curl version: " << curl_version() << endl;
350  curl_easy_setopt(d_curl, CURLOPT_VERBOSE, 1);
351  curl_easy_setopt(d_curl, CURLOPT_DEBUGFUNCTION, curl_debug);
352  }
353 }
354 
358 class BuildHeaders : public unary_function<const string &, void>
359 {
360  struct curl_slist *d_cl;
361 
362 public:
363  BuildHeaders() : d_cl(0)
364  {}
365 
366  void operator()(const string &header)
367  {
368  DBG(cerr << "Adding '" << header.c_str() << "' to the header list."
369  << endl);
370  d_cl = curl_slist_append(d_cl, header.c_str());
371  }
372 
373  struct curl_slist *get_headers()
374  {
375  return d_cl;
376  }
377 };
378 
393 long
394 HTTPConnect::read_url(const string &url, FILE *stream,
395  vector<string> *resp_hdrs,
396  const vector<string> *headers)
397 {
398  curl_easy_setopt(d_curl, CURLOPT_URL, url.c_str());
399 
400 #ifdef WIN32
401  // See the curl documentation for CURLOPT_FILE (aka CURLOPT_WRITEDATA)
402  // and the CURLOPT_WRITEFUNCTION option. Quote: "If you are using libcurl as
403  // a win32 DLL, you MUST use the CURLOPT_WRITEFUNCTION option if you set the
404  // CURLOPT_WRITEDATA option or you will experience crashes". At the root of
405  // this issue is that one should not pass a FILE * to a windows DLL. Close
406  // inspection of libcurl yields that their default write function when using
407  // the CURLOPT_WRITEDATA is just "fwrite".
408  curl_easy_setopt(d_curl, CURLOPT_FILE, stream);
409  curl_easy_setopt(d_curl, CURLOPT_WRITEFUNCTION, &fwrite);
410 #else
411  curl_easy_setopt(d_curl, CURLOPT_FILE, stream);
412 #endif
413 
414  DBG(copy(d_request_headers.begin(), d_request_headers.end(),
415  ostream_iterator<string>(cerr, "\n")));
416 
417  BuildHeaders req_hdrs;
418  req_hdrs = for_each(d_request_headers.begin(), d_request_headers.end(),
419  req_hdrs);
420  if (headers)
421  req_hdrs = for_each(headers->begin(), headers->end(), req_hdrs);
422  curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, req_hdrs.get_headers());
423 
424  // Turn off the proxy for this URL?
425  bool temporary_proxy = false;
426  if ((temporary_proxy = url_uses_no_proxy_for(url))) {
427  DBG(cerr << "Suppress proxy for url: " << url << endl);
428  curl_easy_setopt(d_curl, CURLOPT_PROXY, 0);
429  }
430 
431  string::size_type at_sign = url.find('@');
432  // Assume username:password present *and* assume it's an HTTP URL; it *is*
433  // HTTPConnect, after all. 7 is position after "http://"; the second arg
434  // to substr() is the sub string length.
435  if (at_sign != url.npos)
436  d_upstring = url.substr(7, at_sign - 7);
437 
438  if (!d_upstring.empty())
439  curl_easy_setopt(d_curl, CURLOPT_USERPWD, d_upstring.c_str());
440 
441  // Pass save_raw_http_headers() a pointer to the vector<string> where the
442  // response headers may be stored. Callers can use the resp_hdrs
443  // value/result parameter to get the raw response header information .
444  curl_easy_setopt(d_curl, CURLOPT_WRITEHEADER, resp_hdrs);
445 
446  // This is the call that causes curl to go and get the remote resource and "write it down"
447  // utilizing the configuration state that has been previously conditioned by various perturbations
448  // of calls to curl_easy_setopt().
449  CURLcode res = curl_easy_perform(d_curl);
450 
451  // Free the header list and null the value in d_curl.
452  curl_slist_free_all(req_hdrs.get_headers());
453  curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, 0);
454 
455  // Reset the proxy?
456  if (temporary_proxy && !d_rcr->get_proxy_server_host().empty())
457  curl_easy_setopt(d_curl, CURLOPT_PROXY,
458  d_rcr->get_proxy_server_host().c_str());
459 
460  if (res != 0)
461  throw Error(d_error_buffer);
462 
463  long status;
464  res = curl_easy_getinfo(d_curl, CURLINFO_HTTP_CODE, &status);
465  if (res != 0)
466  throw Error(d_error_buffer);
467 
468  return status;
469 }
470 
474 bool
475 HTTPConnect::url_uses_proxy_for(const string &url) throw()
476 {
477  if (d_rcr->is_proxy_for_used()) {
478  Regex host_regex(d_rcr->get_proxy_for_regexp().c_str());
479  int index = 0, matchlen;
480  return host_regex.search(url.c_str(), url.size(), matchlen, index) != -1;
481  }
482 
483  return false;
484 }
485 
489 bool
490 HTTPConnect::url_uses_no_proxy_for(const string &url) throw()
491 {
492  return d_rcr->is_no_proxy_for_used()
493  && url.find(d_rcr->get_no_proxy_for_host()) != string::npos;
494 }
495 
496 // Public methods. Mostly...
497 
504 HTTPConnect::HTTPConnect(RCReader *rcr) : d_username(""), d_password(""),
505  d_cookie_jar(""),
506  d_dap_client_protocol_major(2),
507  d_dap_client_protocol_minor(0)
508 
509 {
510  d_accept_deflate = rcr->get_deflate();
511  d_rcr = rcr;
512 
513  // Load in the default headers to send with a request. The empty Pragma
514  // headers overrides libcurl's default Pragma: no-cache header (which
515  // will disable caching by Squid, et c.). The User-Agent header helps
516  // make server logs more readable. 05/05/03 jhrg
517  d_request_headers.push_back(string("Pragma:"));
518  string user_agent = string("User-Agent: ") + string(CNAME)
519  + string("/") + string(CVER);
520  d_request_headers.push_back(user_agent);
521  if (d_accept_deflate)
522  d_request_headers.push_back(string("Accept-Encoding: deflate, gzip, compress"));
523 
524  // HTTPCache::instance returns a valid ptr or 0.
525  if (d_rcr->get_use_cache())
526  d_http_cache = HTTPCache::instance(d_rcr->get_dods_cache_root(),true);
527  else
528  d_http_cache = 0;
529 
530  DBG2(cerr << "Cache object created (" << hex << d_http_cache << dec
531  << ")" << endl);
532 
533  if (d_http_cache) {
534  d_http_cache->set_cache_enabled(d_rcr->get_use_cache());
535  d_http_cache->set_expire_ignored(d_rcr->get_ignore_expires() != 0);
536  d_http_cache->set_max_size(d_rcr->get_max_cache_size());
537  d_http_cache->set_max_entry_size(d_rcr->get_max_cached_obj());
538  d_http_cache->set_default_expiration(d_rcr->get_default_expires());
539  d_http_cache->set_always_validate(d_rcr->get_always_validate() != 0);
540  }
541 
542  d_cookie_jar = rcr->get_cookie_jar();
543 
544  www_lib_init(); // This may throw either Error or InternalErr
545 }
546 
548 {
549  DBG2(cerr << "Entering the HTTPConnect dtor" << endl);
550 
551  curl_easy_cleanup(d_curl);
552 
553  DBG2(cerr << "Leaving the HTTPConnect dtor" << endl);
554 }
555 
568 HTTPResponse *
569 HTTPConnect::fetch_url(const string &url)
570 {
571 #ifdef HTTP_TRACE
572  cout << "GET " << url << " HTTP/1.0" << endl;
573 #endif
574 
575  HTTPResponse *stream;
576 
577  if (d_http_cache && d_http_cache->is_cache_enabled()) {
578  stream = caching_fetch_url(url);
579  }
580  else {
581  stream = plain_fetch_url(url);
582  }
583 
584 #ifdef HTTP_TRACE
585  stringstream ss;
586  ss << "HTTP/1.0 " << stream->get_status() << " -" << endl;
587  for (size_t i = 0; i < stream->get_headers()->size(); i++) {
588  ss << stream->get_headers()->at(i) << endl;
589  }
590  cout << ss.str();
591 #endif
592 
593  ParseHeader parser;
594 
595  parser = for_each(stream->get_headers()->begin(),
596  stream->get_headers()->end(), ParseHeader());
597 
598 #ifdef HTTP_TRACE
599  cout << endl << endl;
600 #endif
601 
602  // handle redirection case (2007-04-27, gaffigan@sfos.uaf.edu)
603  if (parser.get_location() != "" &&
604  url.substr(0,url.find("?",0)).compare(parser.get_location().substr(0,url.find("?",0))) != 0) {
605  delete stream;
606  return fetch_url(parser.get_location());
607  }
608 
609  stream->set_type(parser.get_object_type());
610  stream->set_version(parser.get_server());
611  stream->set_protocol(parser.get_protocol());
612 
613  return stream;
614 }
615 
616 // Look around for a reasonable place to put a temporary file. Check first
617 // the value of the TMPDIR env var. If that does not yeild a path that's
618 // writable (as defined by access(..., W_OK|R_OK)) then look at P_tmpdir (as
619 // defined in stdio.h. If both come up empty, then use `./'.
620 
621 // Change this to a version that either returns a string or an open file
622 // descriptor. Use information from https://buildsecurityin.us-cert.gov/
623 // (see open()) to make it more secure. Ideal solution: get deserialize()
624 // methods to read from a stream returned by libcurl, not from a temporary
625 // file. 9/21/07 jhrg Updated to use strings, so other misc changes. 3/22/11
626 static string
627 get_tempfile_template(const string &file_template)
628 {
629  string c;
630 
631  // Windows has one idea of the standard name(s) for a temporary files dir
632 #ifdef WIN32
633  // white list for a WIN32 directory
634  Regex directory("[-a-zA-Z0-9_:\\]*");
635 
636  // If we're OK to use getenv(), try it.
637 #ifdef USE_GETENV
638  c = getenv("TEMP");
639  if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
640  goto valid_temp_directory;
641 
642  c= getenv("TMP");
643  if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
644  goto valid_temp_directory;
645 #endif // USE_GETENV
646 
647  // The windows default
648  c = "c:\tmp";
649  if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
650  goto valid_temp_directory;
651 
652 #else // Unix/Linux/OSX has another...
653  // white list for a directory
654  Regex directory("[-a-zA-Z0-9_/]*");
655 #ifdef USE_GETENV
656  c = getenv("TMPDIR");
657  if (directory.match(c.c_str(), c.length()) && (access(c.c_str(), W_OK | R_OK) == 0))
658  goto valid_temp_directory;
659 #endif // USE_GETENV
660 
661  // Unix defines this sometimes - if present, use it.
662 #ifdef P_tmpdir
663  if (access(P_tmpdir, W_OK | R_OK) == 0) {
664  c = P_tmpdir;
665  goto valid_temp_directory;
666  }
667 #endif
668 
669  // The Unix default
670  c = "/tmp";
671  if (directory.match(c.c_str(), c.length()) && (access(c.c_str(), W_OK | R_OK) == 0))
672  goto valid_temp_directory;
673 
674 #endif // WIN32
675 
676  // If we found nothing useful, use the current directory
677  c = ".";
678 
679 valid_temp_directory:
680 
681 #ifdef WIN32
682  c += "\\" + file_template;
683 #else
684  c += "/" + file_template;
685 #endif
686 
687  return c;
688 }
689 
708 string
709 get_temp_file(FILE *&stream) throw(InternalErr)
710 {
711  string dods_temp = get_tempfile_template((string)"dodsXXXXXX");
712 
713  vector<char> pathname(dods_temp.length() + 1);
714 
715  strncpy(&pathname[0], dods_temp.c_str(), dods_temp.length());
716 
717  DBG(cerr << "pathanme: " << &pathname[0] << " (" << dods_temp.length() + 1 << ")" << endl);
718 
719  // Open truncated for update. NB: mkstemp() returns a file descriptor.
720 #if defined(WIN32) || defined(TEST_WIN32_TEMPS)
721  stream = fopen(_mktemp(&pathname[0]), "w+b");
722 #else
723  // Make sure that temp files are accessible only by the owner.
724  umask(077);
725  stream = fdopen(mkstemp(&pathname[0]), "w+");
726 #endif
727 
728  if (!stream) {
729  throw InternalErr(__FILE__, __LINE__,
730  "Failed to open a temporary file for the data values ("
731  + dods_temp + ")");
732  }
733 
734  dods_temp = &pathname[0];
735  return dods_temp;
736 }
737 
739 void
740 close_temp(FILE *s, const string &name)
741 {
742  int res = fclose(s);
743  if (res)
744  throw InternalErr(__FILE__, __LINE__, "!FAIL! " + long_to_string(res));
745 
746  res = unlink(name.c_str());
747  if (res != 0)
748  throw InternalErr(__FILE__, __LINE__, "!FAIL! " + long_to_string(res));
749 }
750 
772 HTTPResponse *
773 HTTPConnect::caching_fetch_url(const string &url)
774 {
775  DBG(cerr << "Is this URL (" << url << ") in the cache?... ");
776 
777  vector<string> *headers = new vector<string>;
778  string file_name;
779  FILE *s = d_http_cache->get_cached_response(url, *headers, file_name);
780  if (!s) {
781  // url not in cache; get it and cache it
782  DBGN(cerr << "no; getting response and caching." << endl);
783  delete headers; headers = 0;
784  time_t now = time(0);
785  HTTPResponse *rs = plain_fetch_url(url);
786  d_http_cache->cache_response(url, now, *(rs->get_headers()), rs->get_stream());
787 
788  return rs;
789  }
790  else { // url in cache
791  DBGN(cerr << "yes... ");
792 
793  if (d_http_cache->is_url_valid(url)) { // url in cache and valid
794  DBGN(cerr << "and it's valid; using cached response." << endl);
795  HTTPCacheResponse *crs = new HTTPCacheResponse(s, 200, headers, file_name, d_http_cache);
796  return crs;
797  }
798  else { // url in cache but not valid; validate
799  DBGN(cerr << "but it's not valid; validating... ");
800 
801  d_http_cache->release_cached_response(s); // This closes 's'
802  headers->clear();
803  vector<string> cond_hdrs = d_http_cache->get_conditional_request_headers(url);
804  FILE *body = 0;
805  string dods_temp = get_temp_file(body);
806  time_t now = time(0); // When was the request made (now).
807  long http_status;
808 
809  try {
810  http_status = read_url(url, body, /*resp_hdrs*/headers, &cond_hdrs);
811  rewind(body);
812  }
813  catch (Error &e) {
814  close_temp(body, dods_temp);
815  delete headers;
816  throw ;
817  }
818 
819  switch (http_status) {
820  case 200: { // New headers and new body
821  DBGN(cerr << "read a new response; caching." << endl);
822 
823  d_http_cache->cache_response(url, now, /* *resp_hdrs*/*headers, body);
824  HTTPResponse *rs = new HTTPResponse(body, http_status, /*resp_hdrs*/headers, dods_temp);
825 
826  return rs;
827  }
828 
829  case 304: { // Just new headers, use cached body
830  DBGN(cerr << "cached response valid; updating." << endl);
831 
832  close_temp(body, dods_temp);
833  d_http_cache->update_response(url, now, /* *resp_hdrs*/ *headers);
834  string file_name;
835  FILE *hs = d_http_cache->get_cached_response(url, *headers, file_name);
836  HTTPCacheResponse *crs = new HTTPCacheResponse(hs, 304, headers, file_name, d_http_cache);
837  return crs;
838  }
839 
840  default: { // Oops.
841  close_temp(body, dods_temp);
842  if (http_status >= 400) {
843  delete headers; headers = 0;
844  string msg = "Error while reading the URL: ";
845  msg += url;
846  msg
847  += ".\nThe OPeNDAP server returned the following message:\n";
848  msg += http_status_to_string(http_status);
849  throw Error(msg);
850  }
851  else {
852  delete headers; headers = 0;
853  throw InternalErr(__FILE__, __LINE__,
854  "Bad response from the HTTP server: " + long_to_string(http_status));
855  }
856  }
857  }
858  }
859  }
860 
861  throw InternalErr(__FILE__, __LINE__, "Should never get here");
862 }
863 
875 HTTPResponse *
876 HTTPConnect::plain_fetch_url(const string &url)
877 {
878  DBG(cerr << "Getting URL: " << url << endl);
879  FILE *stream = 0;
880  string dods_temp = get_temp_file(stream);
881  vector<string> *resp_hdrs = new vector<string>;
882 
883  int status = -1;
884  try {
885  status = read_url(url, stream, resp_hdrs); // Throws Error.
886  if (status >= 400) {
887  // delete resp_hdrs; resp_hdrs = 0;
888  string msg = "Error while reading the URL: ";
889  msg += url;
890  msg += ".\nThe OPeNDAP server returned the following message:\n";
891  msg += http_status_to_string(status);
892  throw Error(msg);
893  }
894  }
895 
896  catch (Error &e) {
897  delete resp_hdrs;
898  close_temp(stream, dods_temp);
899  throw;
900  }
901 
902  rewind(stream);
903 
904  return new HTTPResponse(stream, status, resp_hdrs, dods_temp);
905 }
906 
918 void
920 {
921  d_accept_deflate = deflate;
922 
923  if (d_accept_deflate) {
924  if (find(d_request_headers.begin(), d_request_headers.end(),
925  "Accept-Encoding: deflate, gzip, compress") == d_request_headers.end())
926  d_request_headers.push_back(string("Accept-Encoding: deflate, gzip, compress"));
927  DBG(copy(d_request_headers.begin(), d_request_headers.end(),
928  ostream_iterator<string>(cerr, "\n")));
929  }
930  else {
931  vector<string>::iterator i;
932  i = remove_if(d_request_headers.begin(), d_request_headers.end(),
933  bind2nd(equal_to<string>(),
934  string("Accept-Encoding: deflate, gzip, compress")));
935  d_request_headers.erase(i, d_request_headers.end());
936  }
937 }
938 
940 class HeaderMatch : public unary_function<const string &, bool> {
941  const string &d_header;
942  public:
943  HeaderMatch(const string &header) : d_header(header) {}
944  bool operator()(const string &arg) { return arg.find(d_header) == 0; }
945 };
946 
955 void
956 HTTPConnect::set_xdap_protocol(int major, int minor)
957 {
958  // Look for, and remove if one exists, an XDAP-Accept header
959  vector<string>::iterator i;
960  i = find_if(d_request_headers.begin(), d_request_headers.end(),
961  HeaderMatch("XDAP-Accept:"));
962  if (i != d_request_headers.end())
963  d_request_headers.erase(i);
964 
965  // Record and add the new header value
966  d_dap_client_protocol_major = major;
967  d_dap_client_protocol_minor = minor;
968  ostringstream xdap_accept;
969  xdap_accept << "XDAP-Accept: " << major << "." << minor;
970 
971  d_request_headers.push_back(xdap_accept.str());
972 
973  DBG(copy(d_request_headers.begin(), d_request_headers.end(),
974  ostream_iterator<string>(cerr, "\n")));
975 }
976 
992 void
993 HTTPConnect::set_credentials(const string &u, const string &p)
994 {
995  if (u.empty())
996  return;
997 
998  // Store the credentials locally.
999  d_username = u;
1000  d_password = p;
1001 
1002  d_upstring = u + ":" + p;
1003 }
1004 
1005 } // namespace libdap
vector< string > get_conditional_request_headers(const string &url)
Definition: HTTPCache.cc:1230
bool is_cache_enabled() const
Definition: HTTPCache.cc:626
virtual int get_status() const
Definition: Response.h:111
void set_cache_enabled(bool mode)
Definition: HTTPCache.cc:614
bool is_url_valid(const string &url)
Definition: HTTPCache.cc:1369
void set_credentials(const string &u, const string &p)
Definition: HTTPConnect.cc:993
static HTTPCache * instance(const string &cache_root, bool force=false)
Definition: HTTPCache.cc:126
void set_max_size(unsigned long size)
Definition: HTTPCache.cc:704
int get_ignore_expires() const
Definition: RCReader.h:151
#define DBGN(x)
Definition: debug.h:59
#define SERVER_ERR_MIN
Definition: HTTPConnect.cc:105
virtual void set_type(ObjectType o)
Definition: Response.h:143
FILE * get_cached_response(const string &url, vector< string > &headers, string &cacheName)
Definition: HTTPCache.cc:1461
string get_cookie_jar() const
Definition: RCReader.h:258
ObjectType
The type of object in the stream coming from the data server.
Definition: ObjectType.h:57
#define SERVER_ERR_MAX
Definition: HTTPConnect.cc:106
HTTPResponse * fetch_url(const string &url)
Definition: HTTPConnect.cc:569
virtual void set_version(const string &v)
Definition: Response.h:147
int get_default_expires() const
Definition: RCReader.h:155
#define DBG2(x)
Definition: debug.h:73
A class for software fault reporting.
Definition: InternalErr.h:64
void parse_mime_header(const string &header, string &name, string &value)
Definition: mime_util.cc:899
virtual void set_protocol(const string &p)
Definition: Response.h:151
unsigned int get_max_cached_obj() const
Definition: RCReader.h:147
bool get_deflate() const
Definition: RCReader.h:168
#define DBG(x)
Definition: debug.h:58
int get_max_cache_size() const
Definition: RCReader.h:143
#define CLIENT_ERR_MAX
Definition: HTTPConnect.cc:80
ObjectType get_description_type(const string &value)
Definition: mime_util.cc:340
void update_response(const string &url, time_t request_time, const vector< string > &headers)
Definition: HTTPCache.cc:1300
void close_temp(FILE *s, const string &name)
Definition: HTTPConnect.cc:740
friend class ParseHeader
Definition: HTTPConnect.h:115
string get_temp_file(FILE *&stream)
Definition: HTTPConnect.cc:709
bool cache_response(const string &url, time_t request_time, const vector< string > &headers, const FILE *body)
Definition: HTTPCache.cc:1137
void set_accept_deflate(bool defalte)
Definition: HTTPConnect.cc:919
string long_to_string(long val, int base)
Definition: util.cc:773
void set_always_validate(bool validate)
Definition: HTTPCache.cc:822
void set_xdap_protocol(int major, int minor)
Definition: HTTPConnect.cc:956
virtual ~HTTPConnect()
Definition: HTTPConnect.cc:547
int dods_keep_temps
Definition: HTTPConnect.cc:77
void set_default_expiration(int exp_time)
Definition: HTTPCache.cc:800
bool get_use_cache() const
Definition: RCReader.h:139
int get_always_validate() const
Definition: RCReader.h:159
virtual vector< string > * get_headers() const
Definition: HTTPResponse.h:124
void release_cached_response(FILE *response)
Definition: HTTPCache.cc:1552
#define CVER
Definition: config.h:37
#define CNAME
Definition: config.h:26
void set_expire_ignored(bool mode)
Definition: HTTPCache.cc:670
#define CLIENT_ERR_MIN
Definition: HTTPConnect.cc:79
string get_dods_cache_root() const
Definition: RCReader.h:135
void set_max_entry_size(unsigned long size)
Definition: HTTPCache.cc:753
int www_trace
Definition: HTTPConnect.cc:74