libdap Updated for version 3.20.10
libdap4 is an implementation of OPeNDAP's DAP protocol.
HTTPConnect.cc
1
2// -*- mode: c++; c-basic-offset:4 -*-
3
4// This file is part of libdap, A C++ implementation of the OPeNDAP Data
5// Access Protocol.
6
7// Copyright (c) 2002,2003 OPeNDAP, Inc.
8// Author: James Gallagher <jgallagher@opendap.org>
9//
10// This library is free software; you can redistribute it and/or
11// modify it under the terms of the GNU Lesser General Public
12// License as published by the Free Software Foundation; either
13// version 2.1 of the License, or (at your option) any later version.
14//
15// This library is distributed in the hope that it will be useful,
16// but WITHOUT ANY WARRANTY; without even the implied warranty of
17// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18// Lesser General Public License for more details.
19//
20// You should have received a copy of the GNU Lesser General Public
21// License along with this library; if not, write to the Free Software
22// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23//
24// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25
26
27#include "config.h"
28
29#ifdef HAVE_UNISTD_H
30#include <unistd.h>
31#endif
32
33#include <sys/stat.h>
34
35#ifdef WIN32
36#include <io.h>
37#endif
38
39#include <string>
40#include <vector>
41#include <functional>
42#include <algorithm>
43#include <sstream>
44#include <fstream>
45#include <iterator>
46#include <cstdlib>
47#include <cstring>
48#include <cerrno>
49
50//#define DODS_DEBUG2
51//#define HTTP_TRACE
52//#define DODS_DEBUG
53
54#undef USE_GETENV
55
56
57#include "debug.h"
58#include "mime_util.h"
59#include "media_types.h"
60#include "GNURegex.h"
61#include "HTTPCache.h"
62#include "HTTPConnect.h"
63#include "RCReader.h"
64#include "HTTPResponse.h"
65#include "HTTPCacheResponse.h"
66
67using namespace std;
68
69namespace libdap {
70
71// These global variables are not MT-Safe, but I'm leaving them as is because
72// they are used only for debugging (set them in a debugger like gdb or ddd).
73// They are not static because I think that many debuggers cannot access
74// static variables. 08/07/02 jhrg
75
76// Set this to 1 to turn on libcurl's verbose mode (for debugging).
77int www_trace = 0;
78
79// Set this to 1 to turn on libcurl's VERY verbose mode.
80int www_trace_extensive = 0;
81
82// Keep the temporary files; useful for debugging.
83int dods_keep_temps = 0;
84
85#define CLIENT_ERR_MIN 400
86#define CLIENT_ERR_MAX 417
87static const char *http_client_errors[CLIENT_ERR_MAX - CLIENT_ERR_MIN +1] =
88 {
89 "Bad Request:",
90 "Unauthorized: Contact the server administrator.",
91 "Payment Required.",
92 "Forbidden: Contact the server administrator.",
93 "Not Found: The data source or server could not be found.\n\
94 Often this means that the OPeNDAP server is missing or needs attention.\n\
95 Please contact the server administrator.",
96 "Method Not Allowed.",
97 "Not Acceptable.",
98 "Proxy Authentication Required.",
99 "Request Time-out.",
100 "Conflict.",
101 "Gone:.",
102 "Length Required.",
103 "Precondition Failed.",
104 "Request Entity Too Large.",
105 "Request URI Too Large.",
106 "Unsupported Media Type.",
107 "Requested Range Not Satisfiable.",
108 "Expectation Failed."
109 };
110
111#define SERVER_ERR_MIN 500
112#define SERVER_ERR_MAX 505
113static const char *http_server_errors[SERVER_ERR_MAX - SERVER_ERR_MIN + 1] =
114 {
115 "Internal Server Error.",
116 "Not Implemented.",
117 "Bad Gateway.",
118 "Service Unavailable.",
119 "Gateway Time-out.",
120 "HTTP Version Not Supported."
121 };
122
125static string
126http_status_to_string(int status)
127{
128 if (status >= CLIENT_ERR_MIN && status <= CLIENT_ERR_MAX)
129 return string(http_client_errors[status - CLIENT_ERR_MIN]);
130 else if (status >= SERVER_ERR_MIN && status <= SERVER_ERR_MAX)
131 return string(http_server_errors[status - SERVER_ERR_MIN]);
132 else
133 return string("Unknown Error: This indicates a problem with libdap++.\nPlease report this to support@opendap.org.");
134}
135
136static ObjectType
137determine_object_type(const string &header_value)
138{
139 // DAP4 Data: application/vnd.opendap.dap4.data
140 // DAP4 DMR: application/vnd.opendap.dap4.dataset-metadata+xml
141
142 string::size_type plus = header_value.find('+');
143 string base_type;
144 string type_extension = "";
145 if (plus != string::npos) {
146 base_type= header_value.substr(0, plus);
147 type_extension = header_value.substr(plus+1);
148 }
149 else
150 base_type = header_value;
151
152 if (base_type == DMR_Content_Type
153 || (base_type.find("application/") != string::npos
154 && base_type.find("dap4.dataset-metadata") != string::npos)) {
155 if (type_extension == "xml")
156 return dap4_dmr;
157 else
158 return unknown_type;
159 }
160 else if (base_type == DAP4_DATA_Content_Type
161 || (base_type.find("application/") != string::npos
162 && base_type.find("dap4.data") != string::npos)) {
163 return dap4_data;
164 }
165 else if (header_value.find("text/html") != string::npos) {
166 return web_error;
167 }
168 else
169 return unknown_type;
170}
171
176class ParseHeader : public unary_function<const string &, void>
177{
178 ObjectType type; // What type of object is in the stream?
179 string server; // Server's version string.
180 string protocol; // Server's protocol version.
181 string location; // Url returned by server
182
183public:
184 ParseHeader() : type(unknown_type), server("dods/0.0"), protocol("2.0")
185 { }
186
187 void operator()(const string &line)
188 {
189 string name, value;
190 parse_mime_header(line, name, value);
191
192 DBG2(cerr << name << ": " << value << endl);
193
194 // Content-Type is used to determine the content of DAP4 responses, but allow the
195 // Content-Description header to override CT o preserve operation with DAP2 servers.
196 // jhrg 11/12/13
197 if (type == unknown_type && name == "content-type") {
198 type = determine_object_type(value); // see above
199 }
200 if (name == "content-description" && !(type == dap4_dmr || type == dap4_data || type == dap4_error)) {
201 type = get_description_type(value); // defined in mime_util.cc
202 }
203 // The second test (== "dods/0.0") tests if xopendap-server has already
204 // been seen. If so, use that header in preference to the old
205 // XDODS-Server header. jhrg 2/7/06
206 else if (name == "xdods-server" && server == "dods/0.0") {
207 server = value;
208 }
209 else if (name == "xopendap-server") {
210 server = value;
211 }
212 else if (name == "xdap") {
213 protocol = value;
214 }
215 else if (server == "dods/0.0" && name == "server") {
216 server = value;
217 }
218 else if (name == "location") {
219 location = value;
220 }
221 }
222
223 ObjectType get_object_type()
224 {
225 return type;
226 }
227
228 string get_server()
229 {
230 return server;
231 }
232
233 string get_protocol()
234 {
235 return protocol;
236 }
237
238 string get_location() {
239 return location;
240 }
241};
242
258static size_t
259save_raw_http_headers(void *ptr, size_t size, size_t nmemb, void *resp_hdrs)
260{
261 DBG2(cerr << "Inside the header parser." << endl);
262 vector<string> *hdrs = static_cast<vector<string> * >(resp_hdrs);
263
264 // Grab the header, minus the trailing newline. Or \r\n pair.
265 string complete_line;
266 if (nmemb > 1 && *(static_cast<char*>(ptr) + size * (nmemb - 2)) == '\r')
267 complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 2));
268 else
269 complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 1));
270
271 // Store all non-empty headers that are not HTTP status codes
272 if (complete_line != "" && complete_line.find("HTTP") == string::npos) {
273 DBG(cerr << "Header line: " << complete_line << endl);
274 hdrs->push_back(complete_line);
275 }
276
277 return size * nmemb;
278}
279
281static int
282curl_debug(CURL *, curl_infotype info, char *msg, size_t size, void *)
283{
284 string message(msg, size);
285
286 switch (info) {
287 case CURLINFO_TEXT:
288 cerr << "Text: " << message; break;
289 case CURLINFO_HEADER_IN:
290 cerr << "Header in: " << message; break;
291 case CURLINFO_HEADER_OUT:
292 cerr << "Header out: " << message; break;
293 case CURLINFO_DATA_IN:
294 if (www_trace_extensive)
295 cerr << "Data in: " << message; break;
296 case CURLINFO_DATA_OUT:
297 if (www_trace_extensive)
298 cerr << "Data out: " << message; break;
299 case CURLINFO_END:
300 cerr << "End: " << message; break;
301#ifdef CURLINFO_SSL_DATA_IN
302 case CURLINFO_SSL_DATA_IN:
303 cerr << "SSL Data in: " << message; break;
304#endif
305#ifdef CURLINFO_SSL_DATA_OUT
306 case CURLINFO_SSL_DATA_OUT:
307 cerr << "SSL Data out: " << message; break;
308#endif
309 default:
310 if (www_trace_extensive)
311 cerr << "Curl info: " << message; break;
312 }
313 return 0;
314}
315
319void
320HTTPConnect::www_lib_init()
321{
322 curl_global_init(CURL_GLOBAL_DEFAULT);
323
324 d_curl = curl_easy_init();
325 if (!d_curl)
326 throw InternalErr(__FILE__, __LINE__, "Could not initialize libcurl.");
327
328 curl_easy_setopt(d_curl, CURLOPT_ERRORBUFFER, d_error_buffer);
329
330 curl_easy_setopt(d_curl, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1_2); // enables TLSv1.2 / TLSv1.3 version only
331
332 // Now set options that will remain constant for the duration of this
333 // CURL object.
334
335 // Set the proxy host.
336 if (!d_rcr->get_proxy_server_host().empty()) {
337 DBG(cerr << "Setting up a proxy server." << endl);
338 DBG(cerr << "Proxy host: " << d_rcr->get_proxy_server_host()
339 << endl);
340 DBG(cerr << "Proxy port: " << d_rcr->get_proxy_server_port()
341 << endl);
342 DBG(cerr << "Proxy pwd : " << d_rcr->get_proxy_server_userpw()
343 << endl);
344 curl_easy_setopt(d_curl, CURLOPT_PROXY,
345 d_rcr->get_proxy_server_host().c_str());
346 curl_easy_setopt(d_curl, CURLOPT_PROXYPORT,
347 d_rcr->get_proxy_server_port());
348
349 // As of 4/21/08 only NTLM, Digest and Basic work.
350#ifdef CURLOPT_PROXYAUTH
351 curl_easy_setopt(d_curl, CURLOPT_PROXYAUTH, (long)CURLAUTH_ANY);
352#endif
353
354 // Password might not be required. 06/21/04 jhrg
355 if (!d_rcr->get_proxy_server_userpw().empty())
356 curl_easy_setopt(d_curl, CURLOPT_PROXYUSERPWD,
357 d_rcr->get_proxy_server_userpw().c_str());
358 }
359
360 // We have to set FailOnError to false for any of the non-Basic
361 // authentication schemes to work. 07/28/03 jhrg
362 curl_easy_setopt(d_curl, CURLOPT_FAILONERROR, 0);
363
364 // This means libcurl will use Basic, Digest, GSS Negotiate, or NTLM,
365 // choosing the the 'safest' one supported by the server.
366 // This requires curl 7.10.6 which is still in pre-release. 07/25/03 jhrg
367 curl_easy_setopt(d_curl, CURLOPT_HTTPAUTH, (long)CURLAUTH_ANY);
368
369 curl_easy_setopt(d_curl, CURLOPT_NOPROGRESS, 1);
370 curl_easy_setopt(d_curl, CURLOPT_NOSIGNAL, 1);
371 curl_easy_setopt(d_curl, CURLOPT_HEADERFUNCTION, save_raw_http_headers);
372 // In read_url a call to CURLOPT_WRITEHEADER is used to set the fourth
373 // param of save_raw_http_headers to a vector<string> object.
374
375 // Follow 302 (redirect) responses
376 curl_easy_setopt(d_curl, CURLOPT_FOLLOWLOCATION, 1);
377 curl_easy_setopt(d_curl, CURLOPT_MAXREDIRS, 5);
378
379 // If the user turns off SSL validation...
380 if (d_rcr->get_validate_ssl() == 0) {
381 curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYPEER, 0);
382 curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYHOST, 0);
383 }
384
385 // Set libcurl to use netrc to access data behind URS auth.
386 // libcurl will use the provided pathname for the ~/.netrc info. 08/23/19 kln
387 curl_easy_setopt(d_curl, CURLOPT_NETRC, 1);
388
389 // Look to see if cookies are turned on in the .dodsrc file. If so,
390 // activate here. We honor 'session cookies' (cookies without an
391 // expiration date) here so that session-based SSO systems will work as
392 // expected.
393 if (!d_cookie_jar.empty()) {
394 DBG(cerr << "Setting the cookie jar to: " << d_cookie_jar << endl);
395 curl_easy_setopt(d_curl, CURLOPT_COOKIEJAR, d_cookie_jar.c_str());
396 curl_easy_setopt(d_curl, CURLOPT_COOKIESESSION, 1);
397 }
398
399 if (www_trace) {
400 cerr << "Curl version: " << curl_version() << endl;
401 curl_easy_setopt(d_curl, CURLOPT_VERBOSE, 1);
402 curl_easy_setopt(d_curl, CURLOPT_DEBUGFUNCTION, curl_debug);
403 }
404}
405
409class BuildHeaders : public unary_function<const string &, void>
410{
411 struct curl_slist *d_cl;
412
413public:
414 BuildHeaders() : d_cl(0)
415 {}
416
417 void operator()(const string &header)
418 {
419 DBG(cerr << "Adding '" << header.c_str() << "' to the header list."
420 << endl);
421 d_cl = curl_slist_append(d_cl, header.c_str());
422 }
423
424 struct curl_slist *get_headers()
425 {
426 return d_cl;
427 }
428};
429
444long
445HTTPConnect::read_url(const string &url, FILE *stream, vector<string> *resp_hdrs, const vector<string> *headers)
446{
447 curl_easy_setopt(d_curl, CURLOPT_URL, url.c_str());
448
449#ifdef WIN32
450 // See the curl documentation for CURLOPT_FILE (aka CURLOPT_WRITEDATA)
451 // and the CURLOPT_WRITEFUNCTION option. Quote: "If you are using libcurl as
452 // a win32 DLL, you MUST use the CURLOPT_WRITEFUNCTION option if you set the
453 // CURLOPT_WRITEDATA option or you will experience crashes". At the root of
454 // this issue is that one should not pass a FILE * to a windows DLL. Close
455 // inspection of libcurl yields that their default write function when using
456 // the CURLOPT_WRITEDATA is just "fwrite".
457 curl_easy_setopt(d_curl, CURLOPT_WRITEDATA, stream);
458 curl_easy_setopt(d_curl, CURLOPT_WRITEFUNCTION, &fwrite);
459#else
460 curl_easy_setopt(d_curl, CURLOPT_WRITEDATA, stream);
461#endif
462
463 DBG(copy(d_request_headers.begin(), d_request_headers.end(),
464 ostream_iterator<string>(cerr, "\n")));
465
466 BuildHeaders req_hdrs;
467 req_hdrs = for_each(d_request_headers.begin(), d_request_headers.end(),
468 req_hdrs);
469 if (headers)
470 req_hdrs = for_each(headers->begin(), headers->end(), req_hdrs);
471
472 curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, req_hdrs.get_headers());
473
474 // Turn off the proxy for this URL?
475 bool temporary_proxy = false;
476 if ((temporary_proxy = url_uses_no_proxy_for(url))) {
477 DBG(cerr << "Suppress proxy for url: " << url << endl);
478 curl_easy_setopt(d_curl, CURLOPT_PROXY, 0);
479 }
480
481 string::size_type at_sign = url.find('@');
482 // Assume username:password present *and* assume it's an HTTP URL; it *is*
483 // HTTPConnect, after all. 7 is position after "http://"; the second arg
484 // to substr() is the sub string length.
485 if (at_sign != url.npos)
486 d_upstring = url.substr(7, at_sign - 7);
487
488 if (!d_upstring.empty())
489 curl_easy_setopt(d_curl, CURLOPT_USERPWD, d_upstring.c_str());
490
491 // Pass save_raw_http_headers() a pointer to the vector<string> where the
492 // response headers may be stored. Callers can use the resp_hdrs
493 // value/result parameter to get the raw response header information .
494 curl_easy_setopt(d_curl, CURLOPT_WRITEHEADER, resp_hdrs);
495
496 // This is the call that causes curl to go and get the remote resource and "write it down"
497 // utilizing the configuration state that has been previously conditioned by various perturbations
498 // of calls to curl_easy_setopt().
499 CURLcode res = curl_easy_perform(d_curl);
500
501 // Free the header list and null the value in d_curl.
502 curl_slist_free_all(req_hdrs.get_headers());
503 curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, 0);
504
505 // Reset the proxy?
506 if (temporary_proxy && !d_rcr->get_proxy_server_host().empty())
507 curl_easy_setopt(d_curl, CURLOPT_PROXY,
508 d_rcr->get_proxy_server_host().c_str());
509
510 if (res != 0)
511 throw Error(d_error_buffer);
512
513 long status;
514 res = curl_easy_getinfo(d_curl, CURLINFO_HTTP_CODE, &status);
515 if (res != 0)
516 throw Error(d_error_buffer);
517
518 char *ct_ptr = 0;
519 res = curl_easy_getinfo(d_curl, CURLINFO_CONTENT_TYPE, &ct_ptr);
520 if (res == CURLE_OK && ct_ptr)
521 d_content_type = ct_ptr;
522 else
523 d_content_type = "";
524
525 return status;
526}
527
531bool
532HTTPConnect::url_uses_proxy_for(const string &url)
533{
534 if (d_rcr->is_proxy_for_used()) {
535 // NB: This could be improved by moving the Regex instance into
536 // the RCReader class, but the proxy stuff is all deprecated.
537 // jhrg 12/1/21
538 Regex host_regex(d_rcr->get_proxy_for_regexp().c_str());
539 int index = 0, matchlen;
540 return host_regex.search(url.c_str(), url.size(), matchlen, index) != -1;
541 }
542
543 return false;
544}
545
549bool
550HTTPConnect::url_uses_no_proxy_for(const string &url) throw()
551{
552 return d_rcr->is_no_proxy_for_used()
553 && url.find(d_rcr->get_no_proxy_for_host()) != string::npos;
554}
555
556// Public methods. Mostly...
557
564HTTPConnect::HTTPConnect(RCReader *rcr, bool use_cpp) : d_username(""), d_password(""), d_cookie_jar(""),
565 d_dap_client_protocol_major(2), d_dap_client_protocol_minor(0), d_use_cpp_streams(use_cpp)
566
567{
568 d_accept_deflate = rcr->get_deflate();
569 d_rcr = rcr;
570
571 // Load in the default headers to send with a request. The empty Pragma
572 // headers overrides libcurl's default Pragma: no-cache header (which
573 // will disable caching by Squid, et c.). The User-Agent header helps
574 // make server logs more readable. 05/05/03 jhrg
575 d_request_headers.push_back(string("Pragma:"));
576 string user_agent = string("User-Agent: ") + string(CNAME)
577 + string("/") + string(CVER);
578 d_request_headers.push_back(user_agent);
579 if (d_accept_deflate)
580 d_request_headers.push_back(string("Accept-Encoding: deflate, gzip, compress"));
581
582 // HTTPCache::instance returns a valid ptr or 0.
583 if (d_rcr->get_use_cache())
584 d_http_cache = HTTPCache::instance(d_rcr->get_dods_cache_root(),true);
585 else
586 d_http_cache = 0;
587
588 DBG2(cerr << "Cache object created (" << hex << d_http_cache << dec
589 << ")" << endl);
590
591 if (d_http_cache) {
592 d_http_cache->set_cache_enabled(d_rcr->get_use_cache());
593 d_http_cache->set_expire_ignored(d_rcr->get_ignore_expires() != 0);
594 d_http_cache->set_max_size(d_rcr->get_max_cache_size());
595 d_http_cache->set_max_entry_size(d_rcr->get_max_cached_obj());
596 d_http_cache->set_default_expiration(d_rcr->get_default_expires());
597 d_http_cache->set_always_validate(d_rcr->get_always_validate() != 0);
598 }
599
600 d_cookie_jar = rcr->get_cookie_jar();
601
602 www_lib_init(); // This may throw either Error or InternalErr
603}
604
605HTTPConnect::~HTTPConnect()
606{
607 DBG2(cerr << "Entering the HTTPConnect dtor" << endl);
608
609 curl_easy_cleanup(d_curl);
610
611 DBG2(cerr << "Leaving the HTTPConnect dtor" << endl);
612}
613
615class HeaderMatch : public unary_function<const string &, bool> {
616 const string &d_header;
617 public:
618 HeaderMatch(const string &header) : d_header(header) {}
619 bool operator()(const string &arg) { return arg.find(d_header) == 0; }
620};
621
634HTTPResponse *
635HTTPConnect::fetch_url(const string &url)
636{
637#ifdef HTTP_TRACE
638 cout << "GET " << url << " HTTP/1.0" << endl;
639#endif
640
641 HTTPResponse *stream;
642
643 if (/*d_http_cache && d_http_cache->*/is_cache_enabled()) {
644 stream = caching_fetch_url(url);
645 }
646 else {
647 stream = plain_fetch_url(url);
648 }
649
650#ifdef HTTP_TRACE
651 stringstream ss;
652 ss << "HTTP/1.0 " << stream->get_status() << " -" << endl;
653 for (size_t i = 0; i < stream->get_headers()->size(); i++) {
654 ss << stream->get_headers()->at(i) << endl;
655 }
656 cout << ss.str();
657#endif
658
659 ParseHeader parser;
660
661 // An apparent quirk of libcurl is that it does not pass the Content-type
662 // header to the callback used to save them, but check and add it from the
663 // saved state variable only if it's not there (without this a test failed
664 // in HTTPCacheTest). jhrg 11/12/13
665 if (!d_content_type.empty() && find_if(stream->get_headers()->begin(), stream->get_headers()->end(),
666 HeaderMatch("Content-Type:")) == stream->get_headers()->end())
667 stream->get_headers()->push_back("Content-Type: " + d_content_type);
668
669 parser = for_each(stream->get_headers()->begin(), stream->get_headers()->end(), ParseHeader());
670
671#ifdef HTTP_TRACE
672 cout << endl << endl;
673#endif
674
675 // handle redirection case (2007-04-27, gaffigan@sfos.uaf.edu)
676 if (parser.get_location() != "" &&
677 url.substr(0,url.find("?",0)).compare(parser.get_location().substr(0,url.find("?",0))) != 0) {
678 delete stream;
679 return fetch_url(parser.get_location());
680 }
681
682 stream->set_type(parser.get_object_type()); // uses the value of content-description
683
684 stream->set_version(parser.get_server());
685 stream->set_protocol(parser.get_protocol());
686
687 if (d_use_cpp_streams) {
688 stream->transform_to_cpp();
689 }
690
691 return stream;
692}
693
694// Look around for a reasonable place to put a temporary file. Check first
695// the value of the TMPDIR env var. If that does not yeild a path that's
696// writable (as defined by access(..., W_OK|R_OK)) then look at P_tmpdir (as
697// defined in stdio.h. If both come up empty, then use `./'.
698
699// Change this to a version that either returns a string or an open file
700// descriptor. Use information from https://buildsecurityin.us-cert.gov/
701// (see open()) to make it more secure. Ideal solution: get deserialize()
702// methods to read from a stream returned by libcurl, not from a temporary
703// file. 9/21/07 jhrg Updated to use strings, other misc changes. 3/22/11
704static string
705get_tempfile_template(const string &file_template)
706{
707 string c;
708
709 // Windows has one idea of the standard name(s) for a temporary files dir
710#ifdef WIN32
711 // white list for a WIN32 directory
712 Regex directory("[-a-zA-Z0-9_:\\]*");
713
714 // If we're OK to use getenv(), try it.
715#ifdef USE_GETENV
716 c = getenv("TEMP");
717 if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
718 goto valid_temp_directory;
719
720 c= getenv("TMP");
721 if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
722 goto valid_temp_directory;
723#endif // USE_GETENV
724
725 // The windows default
726 c = "c:\tmp";
727 if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
728 goto valid_temp_directory;
729
730#else // Unix/Linux/OSX has another...
731 // white list for a directory
732 const Regex directory("[-a-zA-Z0-9_/]*");
733#ifdef USE_GETENV
734 c = getenv("TMPDIR");
735 if (directory.match(c.c_str(), c.length()) && (access(c.c_str(), W_OK | R_OK) == 0))
736 goto valid_temp_directory;
737#endif // USE_GETENV
738
739 // Unix defines this sometimes - if present, use it.
740#ifdef P_tmpdir
741 if (access(P_tmpdir, W_OK | R_OK) == 0) {
742 c = P_tmpdir;
743 goto valid_temp_directory;
744 }
745#endif
746
747 // The Unix default
748 c = "/tmp";
749 if (directory.match(c.c_str(), c.length()) && (access(c.c_str(), W_OK | R_OK) == 0))
750 goto valid_temp_directory;
751
752#endif // WIN32
753
754 // If we found nothing useful, use the current directory
755 c = ".";
756
757valid_temp_directory:
758
759#ifdef WIN32
760 c += "\\" + file_template;
761#else
762 c += "/" + file_template;
763#endif
764
765 return c;
766}
767
786string
787get_temp_file(FILE *&stream) throw(Error)
788{
789 string dods_temp = get_tempfile_template((string)"dodsXXXXXX");
790
791 vector<char> pathname(dods_temp.length() + 1);
792
793 strncpy(pathname.data(), dods_temp.c_str(), dods_temp.length());
794
795 DBG(cerr << "pathanme: " << pathname.data() << " (" << dods_temp.length() + 1 << ")" << endl);
796
797 // Open truncated for update. NB: mkstemp() returns a file descriptor.
798#if defined(WIN32) || defined(TEST_WIN32_TEMPS)
799 stream = fopen(_mktemp(pathname.data()), "w+b");
800#else
801 // Make sure that temp files are accessible only by the owner.
802 int mask = umask(077);
803 if (mask < 0)
804 throw Error("Could not set the file creation mask: " + string(strerror(errno)));
805 int fd = mkstemp(pathname.data());
806 if (fd < 0)
807 throw Error("Could not create a temporary file to store the response: " + string(strerror(errno)));
808
809 stream = fdopen(fd, "w+");
810 umask(mask);
811#endif
812
813 if (!stream)
814 throw Error("Failed to open a temporary file for the data values (" + dods_temp + ")");
815
816 dods_temp = pathname.data();
817 return dods_temp;
818}
819
820
826void
827close_temp(FILE *s, const string &name)
828{
829 int res = fclose(s);
830 if (res)
831 throw InternalErr(__FILE__, __LINE__, "!FAIL! " + long_to_string(res));
832
833 res = unlink(name.c_str());
834 if (res != 0)
835 throw InternalErr(__FILE__, __LINE__, "!FAIL! " + long_to_string(res));
836}
837
859HTTPResponse *
860HTTPConnect::caching_fetch_url(const string &url)
861{
862 DBG(cerr << "Is this URL (" << url << ") in the cache?... ");
863
864 vector<string> *headers = new vector<string>;
865 string file_name;
866 FILE *s = d_http_cache->get_cached_response(url, *headers, file_name);
867 if (!s) {
868 // url not in cache; get it and cache it
869 DBGN(cerr << "no; getting response and caching." << endl);
870 delete headers; headers = 0;
871 time_t now = time(0);
872 HTTPResponse *rs = plain_fetch_url(url);
873 d_http_cache->cache_response(url, now, *(rs->get_headers()), rs->get_stream());
874
875 return rs;
876 }
877 else { // url in cache
878 DBGN(cerr << "yes... ");
879
880 if (d_http_cache->is_url_valid(url)) { // url in cache and valid
881 DBGN(cerr << "and it's valid; using cached response." << endl);
882 HTTPCacheResponse *crs = new HTTPCacheResponse(s, 200, headers, file_name, d_http_cache);
883 return crs;
884 }
885 else { // url in cache but not valid; validate
886 DBGN(cerr << "but it's not valid; validating... ");
887
888 d_http_cache->release_cached_response(s); // This closes 's'
889 headers->clear();
890 vector<string> cond_hdrs = d_http_cache->get_conditional_request_headers(url);
891 FILE *body = 0;
892 string dods_temp = get_temp_file(body);
893 time_t now = time(0); // When was the request made (now).
894 long http_status;
895
896 try {
897 http_status = read_url(url, body, /*resp_hdrs*/headers, &cond_hdrs);
898 rewind(body);
899 }
900 catch (Error &e) {
901 close_temp(body, dods_temp);
902 delete headers;
903 throw ;
904 }
905
906 switch (http_status) {
907 case 200: { // New headers and new body
908 DBGN(cerr << "read a new response; caching." << endl);
909
910 d_http_cache->cache_response(url, now, /* *resp_hdrs*/*headers, body);
911 HTTPResponse *rs = new HTTPResponse(body, http_status, /*resp_hdrs*/headers, dods_temp);
912
913 return rs;
914 }
915
916 case 304: { // Just new headers, use cached body
917 DBGN(cerr << "cached response valid; updating." << endl);
918
919 close_temp(body, dods_temp);
920 d_http_cache->update_response(url, now, /* *resp_hdrs*/ *headers);
921 string file_name;
922 FILE *hs = d_http_cache->get_cached_response(url, *headers, file_name);
923 HTTPCacheResponse *crs = new HTTPCacheResponse(hs, 304, headers, file_name, d_http_cache);
924 return crs;
925 }
926
927 default: { // Oops.
928 close_temp(body, dods_temp);
929 if (http_status >= 400) {
930 delete headers; headers = 0;
931 string msg = "Error while reading the URL: ";
932 msg += url;
933 msg
934 += ".\nThe OPeNDAP server returned the following message:\n";
935 msg += http_status_to_string(http_status);
936 throw Error(msg);
937 }
938 else {
939 delete headers; headers = 0;
940 throw InternalErr(__FILE__, __LINE__,
941 "Bad response from the HTTP server: " + long_to_string(http_status));
942 }
943 }
944 }
945 }
946 }
947
948 throw InternalErr(__FILE__, __LINE__, "Should never get here");
949}
950
962HTTPResponse *
963HTTPConnect::plain_fetch_url(const string &url)
964{
965 DBG(cerr << "Getting URL: " << url << endl);
966 FILE *stream = 0;
967 string dods_temp = get_temp_file(stream);
968 vector<string> *resp_hdrs = new vector<string>;
969
970 int status = -1;
971 try {
972 status = read_url(url, stream, resp_hdrs); // Throws Error.
973 if (status >= 400) {
974 // delete resp_hdrs; resp_hdrs = 0;
975 string msg = "Error while reading the URL: ";
976 msg += url;
977 msg += ".\nThe OPeNDAP server returned the following message:\n";
978 msg += http_status_to_string(status);
979 throw Error(msg);
980 }
981 }
982
983 catch (Error &e) {
984 delete resp_hdrs;
985 close_temp(stream, dods_temp);
986 throw;
987 }
988
989#if 0
990 if (d_use_cpp_streams) {
991 fclose(stream);
992 fstream *in = new fstream(dods_temp.c_str(), ios::in|ios::binary);
993 return new HTTPResponse(in, status, resp_hdrs, dods_temp);
994 }
995 else {
996#endif
997 rewind(stream);
998 return new HTTPResponse(stream, status, resp_hdrs, dods_temp);
999#if 0
1000}
1001#endif
1002}
1003
1015void
1017{
1018 d_accept_deflate = deflate;
1019
1020 if (d_accept_deflate) {
1021 if (find(d_request_headers.begin(), d_request_headers.end(),
1022 "Accept-Encoding: deflate, gzip, compress") == d_request_headers.end())
1023 d_request_headers.push_back(string("Accept-Encoding: deflate, gzip, compress"));
1024 DBG(copy(d_request_headers.begin(), d_request_headers.end(),
1025 ostream_iterator<string>(cerr, "\n")));
1026 }
1027 else {
1028 vector<string>::iterator i;
1029 i = remove_if(d_request_headers.begin(), d_request_headers.end(),
1030 bind2nd(equal_to<string>(),
1031 string("Accept-Encoding: deflate, gzip, compress")));
1032 d_request_headers.erase(i, d_request_headers.end());
1033 }
1034}
1035
1044void
1046{
1047 // Look for, and remove if one exists, an XDAP-Accept header
1048 vector<string>::iterator i;
1049 i = find_if(d_request_headers.begin(), d_request_headers.end(),
1050 HeaderMatch("XDAP-Accept:"));
1051 if (i != d_request_headers.end())
1052 d_request_headers.erase(i);
1053
1054 // Record and add the new header value
1055 d_dap_client_protocol_major = major;
1056 d_dap_client_protocol_minor = minor;
1057 ostringstream xdap_accept;
1058 xdap_accept << "XDAP-Accept: " << major << "." << minor;
1059
1060 d_request_headers.push_back(xdap_accept.str());
1061
1062 DBG(copy(d_request_headers.begin(), d_request_headers.end(),
1063 ostream_iterator<string>(cerr, "\n")));
1064}
1065
1081void
1082HTTPConnect::set_credentials(const string &u, const string &p)
1083{
1084 if (u.empty())
1085 return;
1086
1087 // Store the credentials locally.
1088 d_username = u;
1089 d_password = p;
1090
1091 d_upstring = u + ":" + p;
1092}
1093
1094} // namespace libdap
A class for error processing.
Definition Error.h:94
bool cache_response(const string &url, time_t request_time, const vector< string > &headers, const FILE *body)
static HTTPCache * instance(const string &cache_root, bool force=false)
Definition HTTPCache.cc:129
void set_expire_ignored(bool mode)
Definition HTTPCache.cc:690
void set_default_expiration(int exp_time)
Definition HTTPCache.cc:819
void release_cached_response(FILE *response)
vector< string > get_conditional_request_headers(const string &url)
void set_cache_enabled(bool mode)
Definition HTTPCache.cc:635
void set_max_entry_size(unsigned long size)
Definition HTTPCache.cc:772
bool is_url_valid(const string &url)
void set_always_validate(bool validate)
Definition HTTPCache.cc:841
void update_response(const string &url, time_t request_time, const vector< string > &headers)
void set_max_size(unsigned long size)
Definition HTTPCache.cc:724
FILE * get_cached_response(const string &url, vector< string > &headers, string &cacheName)
void set_accept_deflate(bool defalte)
HTTPResponse * fetch_url(const string &url)
void set_credentials(const string &u, const string &p)
void set_xdap_protocol(int major, int minor)
A class for software fault reporting.
Definition InternalErr.h:65
string get_proxy_server_host() const
Get the proxy host.
Definition RCReader.h:181
int get_proxy_server_port() const
Get the proxy port.
Definition RCReader.h:186
string get_proxy_server_userpw() const
Get the proxy username and password.
Definition RCReader.h:191
string get_proxy_for_regexp() const
Definition RCReader.h:215
bool is_proxy_for_used()
Definition RCReader.h:210
Regular expression matching.
Definition GNURegex.h:57
top level DAP object to house generic methods
ObjectType get_description_type(const string &value)
Definition mime_util.cc:337
void parse_mime_header(const string &header, string &name, string &value)
Definition mime_util.cc:910
string get_temp_file(FILE *&stream)
void close_temp(FILE *s, const string &name)