bes  Updated for version 3.20.6
gateway_module/curl_utils.cc
1 // -*- mode: c++; c-basic-offset:4 -*-
2 
3 // This file is part of gateway_module, A C++ module that can be loaded in to
4 // the OPeNDAP Back-End Server (BES) and is able to handle remote requests.
5 
6 // Copyright (c) 2013 OPeNDAP, Inc.
7 // Author: Nathan Potter <ndp@opendap.org>
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 //
23 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
24 
25 #include <unistd.h>
26 #include <algorithm> // std::for_each
27 
28 #include <GNURegex.h>
29 
30 #include "util.h"
31 #include "BESDebug.h"
32 #include "GatewayUtils.h"
33 
34 #include "curl_utils.h"
35 
36 using namespace std;
37 
38 namespace gateway {
39 
40 // Set this to 1 to turn on libcurl's verbose mode (for debugging).
41 int curl_trace = 0;
42 
43 
44 
45 #define CLIENT_ERR_MIN 400
46 #define CLIENT_ERR_MAX 417
47 const char *http_client_errors[CLIENT_ERR_MAX - CLIENT_ERR_MIN +1] =
48  {
49  "Bad Request:",
50  "Unauthorized: Contact the server administrator.",
51  "Payment Required.",
52  "Forbidden: Contact the server administrator.",
53  "Not Found: The data source or server could not be found.\n"
54  "Often this means that the OPeNDAP server is missing or needs attention.\n"
55  "Please contact the server administrator.",
56  "Method Not Allowed.",
57  "Not Acceptable.",
58  "Proxy Authentication Required.",
59  "Request Time-out.",
60  "Conflict.",
61  "Gone:.",
62  "Length Required.",
63  "Precondition Failed.",
64  "Request Entity Too Large.",
65  "Request URI Too Large.",
66  "Unsupported Media Type.",
67  "Requested Range Not Satisfiable.",
68  "Expectation Failed."
69  };
70 
71 #define SERVER_ERR_MIN 500
72 #define SERVER_ERR_MAX 505
73 const char *http_server_errors[SERVER_ERR_MAX - SERVER_ERR_MIN + 1] =
74  {
75  "Internal Server Error.",
76  "Not Implemented.",
77  "Bad Gateway.",
78  "Service Unavailable.",
79  "Gateway Time-out.",
80  "HTTP Version Not Supported."
81  };
82 
83 
86 string http_status_to_string(int status)
87 {
88  if (status >= CLIENT_ERR_MIN && status <= CLIENT_ERR_MAX)
89  return string(http_client_errors[status - CLIENT_ERR_MIN]);
90  else if (status >= SERVER_ERR_MIN && status <= SERVER_ERR_MAX)
91  return string(http_server_errors[status - SERVER_ERR_MIN]);
92  else
93  return string("Unknown Error: This indicates a problem with libdap++.\nPlease report this to support@opendap.org.");
94 }
95 
96 static string getCurlAuthTypeName(const int authType){
97 
98  string authTypeString;
99  int match;
100 
101  match = authType & CURLAUTH_BASIC;
102  if(match){
103  authTypeString += "CURLAUTH_BASIC";
104  }
105 
106  match = authType & CURLAUTH_DIGEST;
107  if(match){
108  if(!authTypeString.empty())
109  authTypeString += " ";
110  authTypeString += "CURLAUTH_DIGEST";
111  }
112 
113  match = authType & CURLAUTH_DIGEST_IE;
114  if(match){
115  if(!authTypeString.empty())
116  authTypeString += " ";
117  authTypeString += "CURLAUTH_DIGEST_IE";
118  }
119 
120  match = authType & CURLAUTH_GSSNEGOTIATE;
121  if(match){
122  if(!authTypeString.empty())
123  authTypeString += " ";
124  authTypeString += "CURLAUTH_GSSNEGOTIATE";
125  }
126 
127  match = authType & CURLAUTH_NTLM;
128  if(match){
129  if(!authTypeString.empty())
130  authTypeString += " ";
131  authTypeString += "CURLAUTH_NTLM";
132  }
133 
134 #if 0
135  match = authType & CURLAUTH_ANY;
136  if(match){
137  if(!authTypeString.empty())
138  authTypeString += " ";
139  authTypeString += "CURLAUTH_ANY";
140  }
141 
142 
143  match = authType & CURLAUTH_ANY;
144  if(match){
145  if(!authTypeString.empty())
146  authTypeString += " ";
147  authTypeString += "CURLAUTH_ANYSAFE";
148  }
149 
150 
151  match = authType & CURLAUTH_ANY;
152  if(match){
153  if(!authTypeString.empty())
154  authTypeString += " ";
155  authTypeString += "CURLAUTH_ONLY";
156  }
157 #endif
158 
159  return authTypeString;
160 }
161 
162 
167 static size_t writeToOpenfileDescriptor( char *data, size_t /* size */, size_t nmemb, void *userdata){
168 
169  int *fd = (int *) userdata;
170 
171  BESDEBUG("curl", "curl_utils::writeToOpenfileDescriptor() - Bytes received " << libdap::long_to_string(nmemb) << endl);
172  int wrote = write(*fd, data, nmemb);
173  BESDEBUG("curl", "curl_utils::writeToOpenfileDescriptor() - Bytes written " << libdap::long_to_string(wrote) << endl);
174 
175  return wrote;
176 }
177 
178 
202 static size_t save_raw_http_headers(void *ptr, size_t size, size_t nmemb, void *resp_hdrs)
203 {
204  BESDEBUG("curl", "curl_utils::save_raw_http_headers() - Inside the header parser." << endl);
205  vector<string> *hdrs = static_cast<vector<string> * >(resp_hdrs);
206 
207  // Grab the header, minus the trailing newline. Or \r\n pair.
208  string complete_line;
209  if (nmemb > 1 && *(static_cast<char*>(ptr) + size * (nmemb - 2)) == '\r')
210  complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 2));
211  else
212  complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 1));
213 
214  // Store all non-empty headers that are not HTTP status codes
215  if (complete_line != "" && complete_line.find("HTTP") == string::npos) {
216  BESDEBUG("curl", "curl_utils::save_raw_http_headers() - Header line: " << complete_line << endl);
217  hdrs->push_back(complete_line);
218  }
219 
220  return size * nmemb;
221 }
222 
223 
224 
225 
226 
228 static int curl_debug(CURL *, curl_infotype info, char *msg, size_t size, void *)
229 {
230  string message(msg, size);
231 
232  switch (info) {
233  case CURLINFO_TEXT:
234  BESDEBUG("curl", "curl_utils::curl_debug() - Text: " << message << endl ); break;
235  case CURLINFO_HEADER_IN:
236  BESDEBUG("curl", "curl_utils::curl_debug() - Header in: " << message << endl ); break;
237  case CURLINFO_HEADER_OUT:
238  BESDEBUG("curl", "curl_utils::curl_debug() - Header out: " << endl << message << endl ); break;
239  case CURLINFO_DATA_IN:
240  BESDEBUG("curl", "curl_utils::curl_debug() - Data in: " << message << endl ); break;
241  case CURLINFO_DATA_OUT:
242  BESDEBUG("curl", "curl_utils::curl_debug() - Data out: " << message << endl ); break;
243  case CURLINFO_END:
244  BESDEBUG("curl", "curl_utils::curl_debug() - End: " << message << endl ); break;
245 #ifdef CURLINFO_SSL_DATA_IN
246  case CURLINFO_SSL_DATA_IN:
247  BESDEBUG("curl", "curl_utils::curl_debug() - SSL Data in: " << message << endl ); break;
248 #endif
249 #ifdef CURLINFO_SSL_DATA_OUT
250  case CURLINFO_SSL_DATA_OUT:
251  BESDEBUG("curl", "curl_utils::curl_debug() - SSL Data out: " << message << endl ); break;
252 #endif
253  default:
254  BESDEBUG("curl", "curl_utils::curl_debug() - Curl info: " << message << endl ); break;
255  }
256  return 0;
257 }
258 
259 
260 
261 
262 
263 
264 
267 class BuildHeaders : public std::unary_function<const string &, void>
268 {
269  struct curl_slist *d_cl;
270 
271 public:
272  BuildHeaders() : d_cl(0)
273  {}
274 
275  void operator()(const string &header)
276  {
277  BESDEBUG("curl", "BuildHeaders::operator() - Adding '" << header.c_str() << "' to the header list." << endl);
278  d_cl = curl_slist_append(d_cl, header.c_str());
279  }
280 
281  struct curl_slist *get_headers()
282  {
283  return d_cl;
284  }
285 };
286 
287 
288 
289 
290 
291 
292 
306 bool configureProxy(CURL *curl, const string &url) {
307  BESDEBUG( "curl", "curl_utils::configureProxy() - BEGIN." << endl);
308 
309  bool using_proxy = false;
310 
311  // I pulled this because I could never find where it was applied
312  // to the curl state in HTTPConnect
313  //string proxyProtocol = GatewayUtils::ProxyProtocol;
314 
315  string proxyHost = gateway::GatewayUtils::ProxyHost;
316  int proxyPort = gateway::GatewayUtils::ProxyPort;
317  string proxyPassword = gateway::GatewayUtils::ProxyPassword;
318  string proxyUser = gateway::GatewayUtils::ProxyUser;
319  string proxyUserPW = gateway::GatewayUtils::ProxyUserPW;
320  int proxyAuthType = gateway::GatewayUtils::ProxyAuthType;
321 
322  if (!proxyHost.empty()) {
323  using_proxy = true;
324  if(proxyPort==0)
325  proxyPort = 8080;
326 
327  // Apparently we don't need this...
328  //if(proxyProtocol.empty())
329  // proxyProtocol = "http";
330 
331  }
332  if (using_proxy) {
333  BESDEBUG( "curl", "curl_utils::configureProxy() - Found proxy configuration." << endl);
334 
335  // Don't set up the proxy server for URLs that match the 'NoProxy'
336  // regex set in the gateway.conf file.
337 
338  // Don't create the regex if the string is empty
339  if (!GatewayUtils::NoProxyRegex.empty()) {
340  BESDEBUG( "curl", "curl_utils::configureProxy() - Found NoProxyRegex." << endl);
341  libdap::Regex r(GatewayUtils::NoProxyRegex.c_str());
342  if (r.match(url.c_str(), url.length()) != -1) {
343  BESDEBUG( "curl", "curl_utils::configureProxy() - Found NoProxy match. Regex: " << GatewayUtils::NoProxyRegex << "; Url: " << url << endl);
344  using_proxy = false;
345  }
346  }
347 
348  if (using_proxy) {
349 
350  BESDEBUG("curl", "curl_utils::configureProxy() - Setting up a proxy server." << endl);
351  BESDEBUG("curl", "curl_utils::configureProxy() - Proxy host: " << proxyHost << endl);
352  BESDEBUG("curl", "curl_utils::configureProxy() - Proxy port: " << proxyPort << endl);
353 
354  curl_easy_setopt(curl, CURLOPT_PROXY, proxyHost.data());
355  curl_easy_setopt(curl, CURLOPT_PROXYPORT, proxyPort);
356 
357 // #ifdef CURLOPT_PROXYAUTH
358 
359  // oddly "#ifdef CURLOPT_PROXYAUTH" doesn't work - even though CURLOPT_PROXYAUTH is defined and valued at 111 it
360  // fails the test. Eclipse hover over the CURLOPT_PROXYAUTH symbol shows: "CINIT(PROXYAUTH, LONG, 111)",
361  // for what that's worth
362 
363  // According to http://curl.haxx.se/libcurl/c/curl_easy_setopt.html#CURLOPTPROXYAUTH As of 4/21/08 only NTLM, Digest and Basic work.
364 
365 #if 0
366  BESDEBUG("curl", "curl_utils::configureProxy() - CURLOPT_PROXYAUTH = " << CURLOPT_PROXYAUTH << endl);
367  BESDEBUG("curl", "curl_utils::configureProxy() - CURLAUTH_BASIC = " << CURLAUTH_BASIC << endl);
368  BESDEBUG("curl", "curl_utils::configureProxy() - CURLAUTH_DIGEST = " << CURLAUTH_DIGEST << endl);
369  BESDEBUG("curl", "curl_utils::configureProxy() - CURLAUTH_DIGEST_IE = " << CURLAUTH_DIGEST_IE << endl);
370  BESDEBUG("curl", "curl_utils::configureProxy() - CURLAUTH_GSSNEGOTIATE = " << CURLAUTH_GSSNEGOTIATE << endl);
371  BESDEBUG("curl", "curl_utils::configureProxy() - CURLAUTH_NTLM = " << CURLAUTH_NTLM << endl);
372  BESDEBUG("curl", "curl_utils::configureProxy() - CURLAUTH_ANY = " << CURLAUTH_ANY << endl);
373  BESDEBUG("curl", "curl_utils::configureProxy() - CURLAUTH_ANYSAFE = " << CURLAUTH_ANYSAFE << endl);
374  BESDEBUG("curl", "curl_utils::configureProxy() - CURLAUTH_ONLY = " << CURLAUTH_ONLY << endl);
375  BESDEBUG("curl", "curl_utils::configureProxy() - Using CURLOPT_PROXYAUTH = " << proxyAuthType << endl);
376 #endif
377 
378  BESDEBUG("curl", "curl_utils::configureProxy() - Using CURLOPT_PROXYAUTH = " << getCurlAuthTypeName(proxyAuthType) << endl);
379  curl_easy_setopt(curl, CURLOPT_PROXYAUTH, proxyAuthType);
380 // #endif
381 
382 
383 
384  if (!proxyUser.empty()){
385  curl_easy_setopt(curl, CURLOPT_PROXYUSERNAME, proxyUser.data());
386  BESDEBUG("curl", "curl_utils::configureProxy() - CURLOPT_PROXYUSER : " << proxyUser << endl);
387 
388  if (!proxyPassword.empty()){
389  curl_easy_setopt(curl, CURLOPT_PROXYPASSWORD, proxyPassword.data());
390  BESDEBUG("curl", "curl_utils::configureProxy() - CURLOPT_PROXYPASSWORD: " << proxyPassword << endl);
391  }
392  }
393  else if (!proxyUserPW.empty()){
394  BESDEBUG("curl",
395  "curl_utils::configureProxy() - CURLOPT_PROXYUSERPWD : " << proxyUserPW << endl);
396  curl_easy_setopt(curl, CURLOPT_PROXYUSERPWD, proxyUserPW.data());
397  }
398 
399  }
400  }
401  BESDEBUG( "curl", "curl_utils::configureProxy() - END." << endl);
402 
403  return using_proxy;
404 }
405 
406 
407 
408 
409 
410 
411 
412 
413 
414 
415 
416 
417 
418 
428 CURL *init(char *error_buffer)
429 {
430 
431  CURL *curl = curl_easy_init();
432  if (!curl)
433  throw libdap::InternalErr(__FILE__, __LINE__, "Could not initialize libcurl.");
434 
435  // Load in the default headers to send with a request. The empty Pragma
436  // headers overrides libcurl's default Pragma: no-cache header (which
437  // will disable caching by Squid, etc.).
438 
439  // the empty Pragma never appears in the outgoing headers when this isn't present
440  // d_request_headers->push_back(string("Pragma: no-cache"));
441 
442  // d_request_headers->push_back(string("Cache-Control: no-cache"));
443 
444  // Allow compressed responses. Sending an empty string enables all supported compression types.
445 #ifndef CURLOPT_ACCEPT_ENCODING
446  curl_easy_setopt(curl, CURLOPT_ENCODING, "");
447 #else
448  curl_easy_setopt(curl, CURLOPT_ACCEPT_ENCODING, "");
449 #endif
450 
451  curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, error_buffer);
452  // We have to set FailOnError to false for any of the non-Basic
453  // authentication schemes to work. 07/28/03 jhrg
454  curl_easy_setopt(curl, CURLOPT_FAILONERROR, 0);
455 
456  // This means libcurl will use Basic, Digest, GSS Negotiate, or NTLM,
457  // choosing the the 'safest' one supported by the server.
458  // This requires curl 7.10.6 which is still in pre-release. 07/25/03 jhrg
459  curl_easy_setopt(curl, CURLOPT_HTTPAUTH, (long)CURLAUTH_ANY);
460 
461  curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 1);
462  curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1);
463  curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, save_raw_http_headers);
464  // In read_url a call to CURLOPT_WRITEHEADER is used to set the fourth
465  // param of save_raw_http_headers to a vector<string> object.
466 
467  // Follow 302 (redirect) responses
468  curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
469  curl_easy_setopt(curl, CURLOPT_MAXREDIRS, 5);
470 
471 
472  // Set the user agent to curls version response because, well, that's what command line curl does :)
473  curl_easy_setopt(curl, CURLOPT_USERAGENT, curl_version());
474 
475 
476 #if 0
477  // If the user turns off SSL validation...
478  if (!d_rcr->get_validate_ssl() == 0) {
479  curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
480  curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0);
481  }
482 
483  // Look to see if cookies are turned on in the .dodsrc file. If so,
484  // activate here. We honor 'session cookies' (cookies without an
485  // expiration date) here so that session-base SSO systems will work as
486  // expected.
487  if (!d_cookie_jar.empty()) {
488  BESDEBUG(cerr << "Setting the cookie jar to: " << d_cookie_jar << endl);
489  curl_easy_setopt(curl, CURLOPT_COOKIEJAR, d_cookie_jar.c_str());
490  curl_easy_setopt(curl, CURLOPT_COOKIESESSION, 1);
491  }
492 #endif
493 
494 
495  if (curl_trace) {
496  BESDEBUG("curl", "curl_utils::www_lib_init() - Curl version: " << curl_version() << endl);
497  curl_easy_setopt(curl, CURLOPT_VERBOSE, 1);
498  BESDEBUG("curl", "curl_utils::www_lib_init() - Curl in verbose mode."<< endl);
499  curl_easy_setopt(curl, CURLOPT_DEBUGFUNCTION, curl_debug);
500  BESDEBUG("curl", "curl_utils::www_lib_init() - Curl debugging function installed."<< endl);
501  }
502 
503 
504  BESDEBUG("curl", "curl_utils::www_lib_init() - curl: " << curl << endl);
505 
506  return curl;
507 
508 
509 }
510 
511 
512 
513 
529 long read_url(CURL *curl,
530  const string &url,
531  int fd,
532  vector<string> *resp_hdrs,
533  const vector<string> *request_headers,
534  char error_buffer[])
535 {
536 
537  BESDEBUG("curl", "curl_utils::read_url() - BEGIN" << endl);
538 
539 
540  curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
541 
542  curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, writeToOpenfileDescriptor);
543 
544 
545 #ifdef CURLOPT_WRITEDATA
546  curl_easy_setopt(curl, CURLOPT_WRITEDATA, &fd);
547 #else
548  curl_easy_setopt(curl, CURLOPT_FILE, &fd);
549 #endif
550 
551 
552 
553  //DBG(copy(d_request_headers.begin(), d_request_headers.end(), ostream_iterator<string>(cerr, "\n")));
554 
555  BuildHeaders req_hdrs;
556  //req_hdrs = for_each(d_request_headers.begin(), d_request_headers.end(),
557  // req_hdrs);
558  if (request_headers)
559  req_hdrs = for_each(request_headers->begin(), request_headers->end(), req_hdrs);
560  curl_easy_setopt(curl, CURLOPT_HTTPHEADER, req_hdrs.get_headers());
561 
562 
563  // Pass save_raw_http_headers() a pointer to the vector<string> where the
564  // response headers may be stored. Callers can use the resp_hdrs
565  // value/result parameter to get the raw response header information .
566  curl_easy_setopt(curl, CURLOPT_WRITEHEADER, resp_hdrs);
567 
568  // This call is the one that makes curl go get the thing.
569  CURLcode res = curl_easy_perform(curl);
570 
571  // Free the header list and null the value in d_curl.
572  curl_slist_free_all(req_hdrs.get_headers());
573  curl_easy_setopt(curl, CURLOPT_HTTPHEADER, 0);
574 
575 
576  if (res != 0){
577  BESDEBUG("curl", "curl_utils::read_url() - OUCH! CURL returned an error! curl msg: " << curl_easy_strerror(res) << endl);
578  BESDEBUG("curl", "curl_utils::read_url() - OUCH! CURL returned an error! error_buffer: " << error_buffer << endl);
579  throw libdap::Error(error_buffer);
580  }
581 
582  long status;
583  res = curl_easy_getinfo(curl, CURLINFO_HTTP_CODE, &status);
584  BESDEBUG("curl", "curl_utils::read_url() - HTTP Status " << status << endl);
585  if (res != CURLE_OK)
586  throw libdap::Error(error_buffer);
587  BESDEBUG("curl", "curl_utils::read_url() - END" << endl);
588 
589  return status;
590 }
591 
592 
593 
594 } /* namespace libcurl */