bes  Updated for version 3.20.6
cmr_module/curl_utils.cc
1 // -*- mode: c++; c-basic-offset:4 -*-
2 
3 // This file is part of cmr_module, A C++ module that can be loaded in to
4 // the OPeNDAP Back-End Server (BES) and is able to handle remote requests.
5 
6 // Copyright (c) 2013 OPeNDAP, Inc.
7 // Author: Nathan Potter <ndp@opendap.org>
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 //
23 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
24 
25 #include <unistd.h>
26 #include <algorithm> // std::for_each
27 
28 #include <GNURegex.h>
29 
30 #include "util.h"
31 #include "BESDebug.h"
32 #include "BESSyntaxUserError.h"
33 #include "CmrUtils.h"
34 #include "WhiteList.h"
35 
36 #include "curl_utils.h"
37 
38 #define MODULE "cmr"
39 
40 using namespace std;
41 
42 namespace cmr {
43 
44 // Set this to 1 to turn on libcurl's verbose mode (for debugging).
45 int curl_trace = 0;
46 
47 #define CLIENT_ERR_MIN 400
48 #define CLIENT_ERR_MAX 417
49 const char *http_client_errors[CLIENT_ERR_MAX - CLIENT_ERR_MIN +1] =
50  {
51  "Bad Request:",
52  "Unauthorized: Contact the server administrator.",
53  "Payment Required.",
54  "Forbidden: Contact the server administrator.",
55  "Not Found: The data source or server could not be found.\n"
56  "Often this means that the OPeNDAP server is missing or needs attention.\n"
57  "Please contact the server administrator.",
58  "Method Not Allowed.",
59  "Not Acceptable.",
60  "Proxy Authentication Required.",
61  "Request Time-out.",
62  "Conflict.",
63  "Gone:.",
64  "Length Required.",
65  "Precondition Failed.",
66  "Request Entity Too Large.",
67  "Request URI Too Large.",
68  "Unsupported Media Type.",
69  "Requested Range Not Satisfiable.",
70  "Expectation Failed."
71  };
72 
73 #define SERVER_ERR_MIN 500
74 #define SERVER_ERR_MAX 505
75 const char *http_server_errors[SERVER_ERR_MAX - SERVER_ERR_MIN + 1] =
76  {
77  "Internal Server Error.",
78  "Not Implemented.",
79  "Bad Gateway.",
80  "Service Unavailable.",
81  "Gateway Time-out.",
82  "HTTP Version Not Supported."
83  };
84 
85 
88 string http_status_to_string(int status)
89 {
90  if (status >= CLIENT_ERR_MIN && status <= CLIENT_ERR_MAX)
91  return string(http_client_errors[status - CLIENT_ERR_MIN]);
92  else if (status >= SERVER_ERR_MIN && status <= SERVER_ERR_MAX)
93  return string(http_server_errors[status - SERVER_ERR_MIN]);
94  else
95  return string("Unknown Error: This indicates a problem with libdap++.\nPlease report this to support@opendap.org.");
96 }
97 
98 static string getCurlAuthTypeName(const int authType){
99 
100  string authTypeString;
101  int match;
102 
103  match = authType & CURLAUTH_BASIC;
104  if(match){
105  authTypeString += "CURLAUTH_BASIC";
106  }
107 
108  match = authType & CURLAUTH_DIGEST;
109  if(match){
110  if(!authTypeString.empty())
111  authTypeString += " ";
112  authTypeString += "CURLAUTH_DIGEST";
113  }
114 
115  match = authType & CURLAUTH_DIGEST_IE;
116  if(match){
117  if(!authTypeString.empty())
118  authTypeString += " ";
119  authTypeString += "CURLAUTH_DIGEST_IE";
120  }
121 
122  match = authType & CURLAUTH_GSSNEGOTIATE;
123  if(match){
124  if(!authTypeString.empty())
125  authTypeString += " ";
126  authTypeString += "CURLAUTH_GSSNEGOTIATE";
127  }
128 
129  match = authType & CURLAUTH_NTLM;
130  if(match){
131  if(!authTypeString.empty())
132  authTypeString += " ";
133  authTypeString += "CURLAUTH_NTLM";
134  }
135 
136 #if 0
137  match = authType & CURLAUTH_ANY;
138  if(match){
139  if(!authTypeString.empty())
140  authTypeString += " ";
141  authTypeString += "CURLAUTH_ANY";
142  }
143 
144 
145  match = authType & CURLAUTH_ANY;
146  if(match){
147  if(!authTypeString.empty())
148  authTypeString += " ";
149  authTypeString += "CURLAUTH_ANYSAFE";
150  }
151 
152 
153  match = authType & CURLAUTH_ANY;
154  if(match){
155  if(!authTypeString.empty())
156  authTypeString += " ";
157  authTypeString += "CURLAUTH_ONLY";
158  }
159 #endif
160 
161  return authTypeString;
162 }
163 
164 
169 static size_t writeToOpenfileDescriptor( char *data, size_t /* size */, size_t nmemb, void *userdata){
170 
171  int *fd = (int *) userdata;
172 
173  BESDEBUG(MODULE, "curl_utils::writeToOpenfileDescriptor() - Bytes received " << libdap::long_to_string(nmemb) << endl);
174  int wrote = write(*fd, data, nmemb);
175  BESDEBUG(MODULE, "curl_utils::writeToOpenfileDescriptor() - Bytes written " << libdap::long_to_string(wrote) << endl);
176 
177  return wrote;
178 }
179 
180 
204 static size_t save_raw_http_headers(void *ptr, size_t size, size_t nmemb, void *resp_hdrs)
205 {
206  BESDEBUG(MODULE, "curl_utils::save_raw_http_headers() - Inside the header parser." << endl);
207  vector<string> *hdrs = static_cast<vector<string> * >(resp_hdrs);
208 
209  // Grab the header, minus the trailing newline. Or \r\n pair.
210  string complete_line;
211  if (nmemb > 1 && *(static_cast<char*>(ptr) + size * (nmemb - 2)) == '\r')
212  complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 2));
213  else
214  complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 1));
215 
216  // Store all non-empty headers that are not HTTP status codes
217  if (complete_line != "" && complete_line.find("HTTP") == string::npos) {
218  BESDEBUG(MODULE, "curl_utils::save_raw_http_headers() - Header line: " << complete_line << endl);
219  hdrs->push_back(complete_line);
220  }
221 
222  return size * nmemb;
223 }
224 
225 
226 
227 
228 
230 static int curl_debug(CURL *, curl_infotype info, char *msg, size_t size, void *)
231 {
232  string message(msg, size);
233 
234  switch (info) {
235  case CURLINFO_TEXT:
236  BESDEBUG(MODULE, "curl_utils::curl_debug() - Text: " << message << endl ); break;
237  case CURLINFO_HEADER_IN:
238  BESDEBUG(MODULE, "curl_utils::curl_debug() - Header in: " << message << endl ); break;
239  case CURLINFO_HEADER_OUT:
240  BESDEBUG(MODULE, "curl_utils::curl_debug() - Header out: " << endl << message << endl ); break;
241  case CURLINFO_DATA_IN:
242  BESDEBUG(MODULE, "curl_utils::curl_debug() - Data in: " << message << endl ); break;
243  case CURLINFO_DATA_OUT:
244  BESDEBUG(MODULE, "curl_utils::curl_debug() - Data out: " << message << endl ); break;
245  case CURLINFO_END:
246  BESDEBUG(MODULE, "curl_utils::curl_debug() - End: " << message << endl ); break;
247 #ifdef CURLINFO_SSL_DATA_IN
248  case CURLINFO_SSL_DATA_IN:
249  BESDEBUG(MODULE, "curl_utils::curl_debug() - SSL Data in: " << message << endl ); break;
250 #endif
251 #ifdef CURLINFO_SSL_DATA_OUT
252  case CURLINFO_SSL_DATA_OUT:
253  BESDEBUG(MODULE, "curl_utils::curl_debug() - SSL Data out: " << message << endl ); break;
254 #endif
255  default:
256  BESDEBUG(MODULE, "curl_utils::curl_debug() - Curl info: " << message << endl ); break;
257  }
258  return 0;
259 }
260 
261 
262 
263 
264 
265 
266 
269 class BuildHeaders : public std::unary_function<const string &, void>
270 {
271  struct curl_slist *d_cl;
272 
273 public:
274  BuildHeaders() : d_cl(0)
275  {}
276 
277  void operator()(const string &header)
278  {
279  BESDEBUG(MODULE, "BuildHeaders::operator() - Adding '" << header.c_str() << "' to the header list." << endl);
280  d_cl = curl_slist_append(d_cl, header.c_str());
281  }
282 
283  struct curl_slist *get_headers()
284  {
285  return d_cl;
286  }
287 };
288 
289 
290 
291 
292 
293 
294 
308 bool configureProxy(CURL *curl, const string &url) {
309  BESDEBUG( MODULE, "curl_utils::configureProxy() - BEGIN." << endl);
310 
311  bool using_proxy = false;
312 
313  // I pulled this because I could never find where it was applied
314  // to the curl state in HTTPConnect
315  //string proxyProtocol = GatewayUtils::ProxyProtocol;
316 
317  string proxyHost = cmr::CmrUtils::ProxyHost;
318  int proxyPort = cmr::CmrUtils::ProxyPort;
319  string proxyPassword = cmr::CmrUtils::ProxyPassword;
320  string proxyUser = cmr::CmrUtils::ProxyUser;
321  string proxyUserPW = cmr::CmrUtils::ProxyUserPW;
322  int proxyAuthType = cmr::CmrUtils::ProxyAuthType;
323 
324  if (!proxyHost.empty()) {
325  using_proxy = true;
326  if(proxyPort==0)
327  proxyPort = 8080;
328 
329  // Apparently we don't need this...
330  //if(proxyProtocol.empty())
331  // proxyProtocol = "http";
332 
333  }
334  if (using_proxy) {
335  BESDEBUG( MODULE, "curl_utils::configureProxy() - Found proxy configuration." << endl);
336 
337  // Don't set up the proxy server for URLs that match the 'NoProxy'
338  // regex set in the gateway.conf file.
339 
340  // Don't create the regex if the string is empty
341  if (!CmrUtils::NoProxyRegex.empty()) {
342  BESDEBUG( MODULE, "curl_utils::configureProxy() - Found NoProxyRegex." << endl);
343  libdap::Regex r(CmrUtils::NoProxyRegex.c_str());
344  if (r.match(url.c_str(), url.length()) != -1) {
345  BESDEBUG( MODULE, "curl_utils::configureProxy() - Found NoProxy match. Regex: " << CmrUtils::NoProxyRegex << "; Url: " << url << endl);
346  using_proxy = false;
347  }
348  }
349 
350  if (using_proxy) {
351 
352  BESDEBUG(MODULE, "curl_utils::configureProxy() - Setting up a proxy server." << endl);
353  BESDEBUG(MODULE, "curl_utils::configureProxy() - Proxy host: " << proxyHost << endl);
354  BESDEBUG(MODULE, "curl_utils::configureProxy() - Proxy port: " << proxyPort << endl);
355 
356  curl_easy_setopt(curl, CURLOPT_PROXY, proxyHost.data());
357  curl_easy_setopt(curl, CURLOPT_PROXYPORT, proxyPort);
358 
359 // #ifdef CURLOPT_PROXYAUTH
360 
361  // oddly "#ifdef CURLOPT_PROXYAUTH" doesn't work - even though CURLOPT_PROXYAUTH is defined and valued at 111 it
362  // fails the test. Eclipse hover over the CURLOPT_PROXYAUTH symbol shows: "CINIT(PROXYAUTH, LONG, 111)",
363  // for what that's worth
364 
365  // According to http://curl.haxx.se/libcurl/c/curl_easy_setopt.html#CURLOPTPROXYAUTH As of 4/21/08 only NTLM, Digest and Basic work.
366 
367 #if 0
368  BESDEBUG(MODULE, "curl_utils::configureProxy() - CURLOPT_PROXYAUTH = " << CURLOPT_PROXYAUTH << endl);
369  BESDEBUG(MODULE, "curl_utils::configureProxy() - CURLAUTH_BASIC = " << CURLAUTH_BASIC << endl);
370  BESDEBUG(MODULE, "curl_utils::configureProxy() - CURLAUTH_DIGEST = " << CURLAUTH_DIGEST << endl);
371  BESDEBUG(MODULE, "curl_utils::configureProxy() - CURLAUTH_DIGEST_IE = " << CURLAUTH_DIGEST_IE << endl);
372  BESDEBUG(MODULE, "curl_utils::configureProxy() - CURLAUTH_GSSNEGOTIATE = " << CURLAUTH_GSSNEGOTIATE << endl);
373  BESDEBUG(MODULE, "curl_utils::configureProxy() - CURLAUTH_NTLM = " << CURLAUTH_NTLM << endl);
374  BESDEBUG(MODULE, "curl_utils::configureProxy() - CURLAUTH_ANY = " << CURLAUTH_ANY << endl);
375  BESDEBUG(MODULE, "curl_utils::configureProxy() - CURLAUTH_ANYSAFE = " << CURLAUTH_ANYSAFE << endl);
376  BESDEBUG(MODULE, "curl_utils::configureProxy() - CURLAUTH_ONLY = " << CURLAUTH_ONLY << endl);
377  BESDEBUG(MODULE, "curl_utils::configureProxy() - Using CURLOPT_PROXYAUTH = " << proxyAuthType << endl);
378 #endif
379 
380  BESDEBUG(MODULE, "curl_utils::configureProxy() - Using CURLOPT_PROXYAUTH = " << getCurlAuthTypeName(proxyAuthType) << endl);
381  curl_easy_setopt(curl, CURLOPT_PROXYAUTH, proxyAuthType);
382 // #endif
383 
384 
385 
386  if (!proxyUser.empty()){
387  curl_easy_setopt(curl, CURLOPT_PROXYUSERNAME, proxyUser.data());
388  BESDEBUG(MODULE, "curl_utils::configureProxy() - CURLOPT_PROXYUSER : " << proxyUser << endl);
389 
390  if (!proxyPassword.empty()){
391  curl_easy_setopt(curl, CURLOPT_PROXYPASSWORD, proxyPassword.data());
392  BESDEBUG(MODULE, "curl_utils::configureProxy() - CURLOPT_PROXYPASSWORD: " << proxyPassword << endl);
393  }
394  }
395  else if (!proxyUserPW.empty()){
396  BESDEBUG(MODULE,
397  "curl_utils::configureProxy() - CURLOPT_PROXYUSERPWD : " << proxyUserPW << endl);
398  curl_easy_setopt(curl, CURLOPT_PROXYUSERPWD, proxyUserPW.data());
399  }
400 
401  }
402  }
403  BESDEBUG( MODULE, "curl_utils::configureProxy() - END." << endl);
404 
405  return using_proxy;
406 }
407 
408 
409 
410 
411 
412 
413 
414 
415 
416 
417 
418 
419 
420 
430 CURL *init(char *error_buffer)
431 {
432 
433  CURL *curl = curl_easy_init();
434  if (!curl)
435  throw libdap::InternalErr(__FILE__, __LINE__, "Could not initialize libcurl.");
436 
437  // Load in the default headers to send with a request. The empty Pragma
438  // headers overrides libcurl's default Pragma: no-cache header (which
439  // will disable caching by Squid, etc.).
440 
441  // the empty Pragma never appears in the outgoing headers when this isn't present
442  // d_request_headers->push_back(string("Pragma: no-cache"));
443 
444  // d_request_headers->push_back(string("Cache-Control: no-cache"));
445 
446  // Allow compressed responses. Sending an empty string enables all supported compression types.
447 #ifndef CURLOPT_ACCEPT_ENCODING
448  curl_easy_setopt(curl, CURLOPT_ENCODING, "");
449 #else
450  curl_easy_setopt(curl, CURLOPT_ACCEPT_ENCODING, "");
451 #endif
452 
453  curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, error_buffer);
454  // We have to set FailOnError to false for any of the non-Basic
455  // authentication schemes to work. 07/28/03 jhrg
456  curl_easy_setopt(curl, CURLOPT_FAILONERROR, 0);
457 
458  // This means libcurl will use Basic, Digest, GSS Negotiate, or NTLM,
459  // choosing the the 'safest' one supported by the server.
460  // This requires curl 7.10.6 which is still in pre-release. 07/25/03 jhrg
461  curl_easy_setopt(curl, CURLOPT_HTTPAUTH, (long)CURLAUTH_ANY);
462 
463  // I added these next three to support Hyrax accessing data held behind URS auth. ndp - 8/20/18
464  curl_easy_setopt(curl, CURLOPT_NETRC, 1);
465 
466  // #TODO #FIXME Make these file names configuration based.
467  curl_easy_setopt(curl, CURLOPT_COOKIEFILE, "/tmp/.hyrax_cookies");
468  curl_easy_setopt(curl, CURLOPT_COOKIEJAR, "/tmp/.hyrax_cookies");
469 
470  curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 1);
471  curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1);
472  curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, save_raw_http_headers);
473  // In read_url a call to CURLOPT_WRITEHEADER is used to set the fourth
474  // param of save_raw_http_headers to a vector<string> object.
475 
476  // Follow 302 (redirect) responses
477  curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
478  curl_easy_setopt(curl, CURLOPT_MAXREDIRS, 5);
479 
480 
481 
482 
483  // Set the user agent to curls version response because, well, that's what command line curl does :)
484  curl_easy_setopt(curl, CURLOPT_USERAGENT, curl_version());
485 
486 
487 #if 0
488  // If the user turns off SSL validation...
489  if (!d_rcr->get_validate_ssl() == 0) {
490  curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
491  curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0);
492  }
493 
494  // Look to see if cookies are turned on in the .dodsrc file. If so,
495  // activate here. We honor 'session cookies' (cookies without an
496  // expiration date) here so that session-base SSO systems will work as
497  // expected.
498  if (!d_cookie_jar.empty()) {
499  BESDEBUG(cerr << "Setting the cookie jar to: " << d_cookie_jar << endl);
500  curl_easy_setopt(curl, CURLOPT_COOKIEJAR, d_cookie_jar.c_str());
501  curl_easy_setopt(curl, CURLOPT_COOKIESESSION, 1);
502  }
503 #endif
504 
505 
506  if (curl_trace) {
507  BESDEBUG(MODULE, "curl_utils::www_lib_init() - Curl version: " << curl_version() << endl);
508  curl_easy_setopt(curl, CURLOPT_VERBOSE, 1);
509  BESDEBUG(MODULE, "curl_utils::www_lib_init() - Curl in verbose mode."<< endl);
510  curl_easy_setopt(curl, CURLOPT_DEBUGFUNCTION, curl_debug);
511  BESDEBUG(MODULE, "curl_utils::www_lib_init() - Curl debugging function installed."<< endl);
512  }
513 
514 
515  BESDEBUG(MODULE, "curl_utils::www_lib_init() - curl: " << curl << endl);
516 
517  return curl;
518 
519 
520 }
521 
522 
523 
524 
540 long read_url(CURL *curl,
541  const string &url,
542  int fd,
543  vector<string> *resp_hdrs,
544  const vector<string> *request_headers,
545  char error_buffer[])
546 {
547  string prolog = string("curl_utils.cc ") + __func__ + "() - ";
548 
549  BESDEBUG(MODULE, prolog << "BEGIN" << endl);
550 
551  // Before we do anything, make sure that the URL is OK to pursue.
552  if (!bes::WhiteList::get_white_list()->is_white_listed(url)) {
553  string err = (string) "The specified URL " + url
554  + " does not match any of the accessible services in"
555  + " the white list.";
556  BESDEBUG(MODULE, prolog << err << endl);
557  throw BESSyntaxUserError(err, __FILE__, __LINE__);
558  }
559 
560  curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
561  curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, writeToOpenfileDescriptor);
562 
563 #ifdef CURLOPT_WRITEDATA
564  curl_easy_setopt(curl, CURLOPT_WRITEDATA, &fd);
565 #else
566  curl_easy_setopt(curl, CURLOPT_FILE, &fd);
567 #endif
568  //DBG(copy(d_request_headers.begin(), d_request_headers.end(), ostream_iterator<string>(cerr, "\n")));
569  BuildHeaders req_hdrs;
570  //req_hdrs = for_each(d_request_headers.begin(), d_request_headers.end(), req_hdrs);
571  if (request_headers)
572  req_hdrs = for_each(request_headers->begin(), request_headers->end(), req_hdrs);
573  curl_easy_setopt(curl, CURLOPT_HTTPHEADER, req_hdrs.get_headers());
574 
575  // Pass save_raw_http_headers() a pointer to the vector<string> where the
576  // response headers may be stored. Callers can use the resp_hdrs
577  // value/result parameter to get the raw response header information .
578  curl_easy_setopt(curl, CURLOPT_WRITEHEADER, resp_hdrs);
579 
580  // This call is the one that makes curl go get the thing.
581  CURLcode res = curl_easy_perform(curl);
582 
583  // Free the header list and null the value in d_curl.
584  curl_slist_free_all(req_hdrs.get_headers());
585  curl_easy_setopt(curl, CURLOPT_HTTPHEADER, 0);
586 
587  if (res != 0){
588  BESDEBUG(MODULE, prolog << "OUCH! CURL returned an error! curl msg: " << curl_easy_strerror(res) << endl);
589  BESDEBUG(MODULE, prolog << "OUCH! CURL returned an error! error_buffer: " << error_buffer << endl);
590  throw libdap::Error(error_buffer);
591  }
592  long status;
593  res = curl_easy_getinfo(curl, CURLINFO_HTTP_CODE, &status);
594  BESDEBUG(MODULE, prolog << "HTTP Status " << status << endl);
595  if (res != CURLE_OK)
596  throw libdap::Error(error_buffer);
597  BESDEBUG(MODULE, prolog << "END" << endl);
598 
599  return status;
600 }
601 
602 
603 
604 } /* namespace cmr */
BESSyntaxUserError
error thrown if there is a user syntax error in the request or any other user error
Definition: BESSyntaxUserError.h:41
bes::WhiteList::get_white_list
static WhiteList * get_white_list()
Static accessor for the singleton.
Definition: WhiteList.cc:55