bes  Updated for version 3.20.6
httpd_catalog_module/curl_utils.cc
1 
2 // -*- mode: c++; c-basic-offset:4 -*-
3 // This file is part of BES httpd_catalog_module, A C++ module that can be loaded in to
4 // the OPeNDAP Back-End Server (BES) and is able to handle remote requests.
5 
6 // Copyright (c) 2018 OPeNDAP, Inc.
7 // Author: Nathan Potter <ndp@opendap.org>
8 //'
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 //
23 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
24 
25 #include <unistd.h>
26 #include <algorithm> // std::for_each
27 #include <sstream>
28 
29 #include <GNURegex.h>
30 
31 #include <util.h>
32 
33 #include "BESDebug.h"
34 #include "BESSyntaxUserError.h"
35 #include "BESInternalError.h"
36 #include "BESInternalFatalError.h"
37 #include "WhiteList.h"
38 
39 #include "curl_utils.h"
40 #include "HttpdCatalogUtils.h"
41 #include "HttpdCatalogNames.h"
42 
43 #define prolog string("curl_utils.cc: ").append(__func__).append("() - ")
44 
45 using namespace std;
46 
47 namespace httpd_catalog {
48 
49 // Set this to 1 to turn on libcurl's verbose mode (for debugging).
50 int curl_trace = 0;
51 
52 #define CLIENT_ERR_MIN 400
53 #define CLIENT_ERR_MAX 417
54 const char *http_client_errors[CLIENT_ERR_MAX - CLIENT_ERR_MIN + 1] = {
55  "Bad Request:",
56  "Unauthorized: Contact the server administrator.",
57  "Payment Required.",
58  "Forbidden: Contact the server administrator.",
59  "Not Found: The data source or server could not be found.\n"
60  "Often this means that the OPeNDAP server is missing or needs attention.\n"
61  "Please contact the server administrator.",
62  "Method Not Allowed.",
63  "Not Acceptable.",
64  "Proxy Authentication Required.",
65  "Request Time-out.",
66  "Conflict.",
67  "Gone.",
68  "Length Required.",
69  "Precondition Failed.",
70  "Request Entity Too Large.",
71  "Request URI Too Large.",
72  "Unsupported Media Type.",
73  "Requested Range Not Satisfiable.",
74  "Expectation Failed."
75 };
76 
77 #define SERVER_ERR_MIN 500
78 #define SERVER_ERR_MAX 505
79 const char *http_server_errors[SERVER_ERR_MAX - SERVER_ERR_MIN + 1] = { "Internal Server Error.", "Not Implemented.", "Bad Gateway.",
80  "Service Unavailable.", "Gateway Time-out.", "HTTP Version Not Supported." };
81 
84 string http_status_to_string(int status)
85 {
86  if (status >= CLIENT_ERR_MIN && status <= CLIENT_ERR_MAX)
87  return string(http_client_errors[status - CLIENT_ERR_MIN]);
88  else if (status >= SERVER_ERR_MIN && status <= SERVER_ERR_MAX)
89  return string(http_server_errors[status - SERVER_ERR_MIN]);
90  else
91  return string("Unknown Error: This indicates a problem with libdap++.\nPlease report this to support@opendap.org.");
92 }
93 
94 static string get_curl_auth_type_name(const int authType)
95 {
96  string authTypeString;
97  int match;
98 
99  match = authType & CURLAUTH_BASIC;
100  if (match) {
101  authTypeString += "CURLAUTH_BASIC";
102  }
103 
104  match = authType & CURLAUTH_DIGEST;
105  if (match) {
106  if (!authTypeString.empty()) authTypeString += " ";
107  authTypeString += "CURLAUTH_DIGEST";
108  }
109 
110  match = authType & CURLAUTH_DIGEST_IE;
111  if (match) {
112  if (!authTypeString.empty()) authTypeString += " ";
113  authTypeString += "CURLAUTH_DIGEST_IE";
114  }
115 
116  match = authType & CURLAUTH_GSSNEGOTIATE;
117  if (match) {
118  if (!authTypeString.empty()) authTypeString += " ";
119  authTypeString += "CURLAUTH_GSSNEGOTIATE";
120  }
121 
122  match = authType & CURLAUTH_NTLM;
123  if (match) {
124  if (!authTypeString.empty()) authTypeString += " ";
125  authTypeString += "CURLAUTH_NTLM";
126  }
127 
128 #if 0
129  match = authType & CURLAUTH_ANY;
130  if(match) {
131  if(!authTypeString.empty())
132  authTypeString += " ";
133  authTypeString += "CURLAUTH_ANY";
134  }
135 
136  match = authType & CURLAUTH_ANY;
137  if(match) {
138  if(!authTypeString.empty())
139  authTypeString += " ";
140  authTypeString += "CURLAUTH_ANYSAFE";
141  }
142 
143  match = authType & CURLAUTH_ANY;
144  if(match) {
145  if(!authTypeString.empty())
146  authTypeString += " ";
147  authTypeString += "CURLAUTH_ONLY";
148  }
149 #endif
150 
151  return authTypeString;
152 }
153 
158 static size_t writeToOpenfileDescriptor(char *data, size_t /* size */, size_t nmemb, void *userdata)
159 {
160  int *fd = (int *) userdata;
161 
162  BESDEBUG(MODULE, prolog << "Bytes received " << libdap::long_to_string(nmemb) << endl);
163  int wrote = write(*fd, data, nmemb);
164  BESDEBUG(MODULE, prolog << "Bytes written " << libdap::long_to_string(wrote) << endl);
165 
166  return wrote;
167 }
168 
192 static size_t save_raw_http_headers(void *ptr, size_t size, size_t nmemb, void *resp_hdrs)
193 {
194  BESDEBUG(MODULE, prolog << "Inside the header parser." << endl);
195  vector<string> *hdrs = static_cast<vector<string> *>(resp_hdrs);
196 
197  // Grab the header, minus the trailing newline. Or \r\n pair.
198  string complete_line;
199  if (nmemb > 1 && *(static_cast<char*>(ptr) + size * (nmemb - 2)) == '\r')
200  complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 2));
201  else
202  complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 1));
203 
204  // Store all non-empty headers that are not HTTP status codes
205  if (complete_line != "" && complete_line.find("HTTP") == string::npos) {
206  BESDEBUG(MODULE, prolog << "Header line: " << complete_line << endl);
207  hdrs->push_back(complete_line);
208  }
209 
210  return size * nmemb;
211 }
212 
214 static int curl_debug(CURL *, curl_infotype info, char *msg, size_t size, void *)
215 {
216  string message(msg, size);
217 
218  switch (info) {
219  case CURLINFO_TEXT:
220  BESDEBUG(MODULE, prolog << "Text: " << message << endl);
221  break;
222  case CURLINFO_HEADER_IN:
223  BESDEBUG(MODULE, prolog << "Header in: " << message << endl);
224  break;
225  case CURLINFO_HEADER_OUT:
226  BESDEBUG(MODULE, prolog << "Header out: " << endl << message << endl);
227  break;
228  case CURLINFO_DATA_IN:
229  BESDEBUG(MODULE, prolog << "Data in: " << message << endl);
230  break;
231  case CURLINFO_DATA_OUT:
232  BESDEBUG(MODULE, prolog << "Data out: " << message << endl);
233  break;
234  case CURLINFO_END:
235  BESDEBUG(MODULE, prolog << "End: " << message << endl);
236  break;
237 #ifdef CURLINFO_SSL_DATA_IN
238  case CURLINFO_SSL_DATA_IN:
239  BESDEBUG(MODULE, prolog << "SSL Data in: " << message << endl ); break;
240 #endif
241 #ifdef CURLINFO_SSL_DATA_OUT
242  case CURLINFO_SSL_DATA_OUT:
243  BESDEBUG(MODULE, prolog << "SSL Data out: " << message << endl ); break;
244 #endif
245  default:
246  BESDEBUG(MODULE, prolog << "Curl info: " << message << endl);
247  break;
248  }
249 
250  return 0;
251 }
252 
255 class BuildHeaders: public std::unary_function<const string &, void> {
256  struct curl_slist *d_cl;
257 
258 public:
259  BuildHeaders() :
260  d_cl(0)
261  {
262  }
263 
264  void operator()(const string &header)
265  {
266  BESDEBUG(MODULE, "BuildHeaders::operator() - Adding '" << header.c_str() << "' to the header list." << endl);
267  d_cl = curl_slist_append(d_cl, header.c_str());
268  }
269 
270  struct curl_slist *get_headers()
271  {
272  return d_cl;
273  }
274 };
275 
289 bool configureProxy(CURL *curl, const string &url)
290 {
291  BESDEBUG(MODULE, prolog << " BEGIN." << endl);
292 
293  bool using_proxy = false;
294 
295  string proxyHost = HttpdCatalogUtils::ProxyHost;
296  int proxyPort = HttpdCatalogUtils::ProxyPort;
297  string proxyPassword = HttpdCatalogUtils::ProxyPassword;
298  string proxyUser = HttpdCatalogUtils::ProxyUser;
299  string proxyUserPW = HttpdCatalogUtils::ProxyUserPW;
300  int proxyAuthType = HttpdCatalogUtils::ProxyAuthType;
301 
302  if (!proxyHost.empty()) {
303  using_proxy = true;
304  if (proxyPort == 0) proxyPort = 8080;
305  }
306 
307  if (using_proxy) {
308  BESDEBUG(MODULE, prolog << "Found proxy configuration." << endl);
309 
310  // Don't set up the proxy server for URLs that match the 'NoProxy'
311  // regex set in the gateway.conf file.
312 
313  // Don't create the regex if the string is empty
314  if (!HttpdCatalogUtils::NoProxyRegex.empty()) {
315  BESDEBUG(MODULE, prolog << "Found NoProxyRegex." << endl);
316  libdap::Regex r(HttpdCatalogUtils::NoProxyRegex.c_str());
317  if (r.match(url.c_str(), url.length()) != -1) {
318  BESDEBUG(MODULE, prolog << "Found NoProxy match. Regex: " << HttpdCatalogUtils::NoProxyRegex << "; Url: " << url << endl);
319  using_proxy = false;
320  }
321  }
322 
323  if (using_proxy) {
324 
325  BESDEBUG(MODULE, prolog << "Setting up a proxy server." << endl);
326  BESDEBUG(MODULE, prolog << "Proxy host: " << proxyHost << endl);
327  BESDEBUG(MODULE, prolog << "Proxy port: " << proxyPort << endl);
328 
329  curl_easy_setopt(curl, CURLOPT_PROXY, proxyHost.data());
330  curl_easy_setopt(curl, CURLOPT_PROXYPORT, proxyPort);
331 
332  // According to http://curl.haxx.se/libcurl/c/curl_easy_setopt.html#CURLOPTPROXYAUTH As of 4/21/08 only NTLM, Digest and Basic work.
333 
334 #if 0
335  BESDEBUG(MODULE, prolog << "CURLOPT_PROXYAUTH = " << CURLOPT_PROXYAUTH << endl);
336  BESDEBUG(MODULE, prolog << "CURLAUTH_BASIC = " << CURLAUTH_BASIC << endl);
337  BESDEBUG(MODULE, prolog << "CURLAUTH_DIGEST = " << CURLAUTH_DIGEST << endl);
338  BESDEBUG(MODULE, prolog << "CURLAUTH_DIGEST_IE = " << CURLAUTH_DIGEST_IE << endl);
339  BESDEBUG(MODULE, prolog << "CURLAUTH_GSSNEGOTIATE = " << CURLAUTH_GSSNEGOTIATE << endl);
340  BESDEBUG(MODULE, prolog << "CURLAUTH_NTLM = " << CURLAUTH_NTLM << endl);
341  BESDEBUG(MODULE, prolog << "CURLAUTH_ANY = " << CURLAUTH_ANY << endl);
342  BESDEBUG(MODULE, prolog << "CURLAUTH_ANYSAFE = " << CURLAUTH_ANYSAFE << endl);
343  BESDEBUG(MODULE, prolog << "CURLAUTH_ONLY = " << CURLAUTH_ONLY << endl);
344  BESDEBUG(MODULE, prolog << "Using CURLOPT_PROXYAUTH = " << proxyAuthType << endl);
345 #endif
346 
347  BESDEBUG(MODULE, prolog << "Using CURLOPT_PROXYAUTH = " << get_curl_auth_type_name(proxyAuthType) << endl);
348  curl_easy_setopt(curl, CURLOPT_PROXYAUTH, proxyAuthType);
349 
350  if (!proxyUser.empty()) {
351  curl_easy_setopt(curl, CURLOPT_PROXYUSERNAME, proxyUser.data());
352  BESDEBUG(MODULE, prolog << "CURLOPT_PROXYUSER : " << proxyUser << endl);
353 
354  if (!proxyPassword.empty()) {
355  curl_easy_setopt(curl, CURLOPT_PROXYPASSWORD, proxyPassword.data());
356  BESDEBUG(MODULE, prolog << "CURLOPT_PROXYPASSWORD: " << proxyPassword << endl);
357  }
358  }
359  else if (!proxyUserPW.empty()) {
360  BESDEBUG(MODULE, prolog << "CURLOPT_PROXYUSERPWD : " << proxyUserPW << endl);
361  curl_easy_setopt(curl, CURLOPT_PROXYUSERPWD, proxyUserPW.data());
362  }
363  }
364  }
365 
366  BESDEBUG(MODULE, prolog << "END." << endl);
367 
368  return using_proxy;
369 }
370 
380 CURL *init(char *error_buffer)
381 {
382  CURL *curl = curl_easy_init();
383  if (!curl) throw BESInternalFatalError("Could not initialize libcurl.", __FILE__, __LINE__);
384 
385  // Load in the default headers to send with a request. The empty Pragma
386  // headers overrides libcurl's default Pragma: no-cache header (which
387  // will disable caching by Squid, etc.).
388 
389  // the empty Pragma never appears in the outgoing headers when this isn't present
390  // d_request_headers->push_back(string("Pragma: no-cache"));
391  // d_request_headers->push_back(string("Cache-Control: no-cache"));
392 
393  // Allow compressed responses. Sending an empty string enables all supported compression types.
394 #ifndef CURLOPT_ACCEPT_ENCODING
395  curl_easy_setopt(curl, CURLOPT_ENCODING, "");
396 #else
397  curl_easy_setopt(curl, CURLOPT_ACCEPT_ENCODING, "");
398 #endif
399 
400  curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, error_buffer);
401  // We have to set FailOnError to false for any of the non-Basic
402  // authentication schemes to work. 07/28/03 jhrg
403  curl_easy_setopt(curl, CURLOPT_FAILONERROR, 0);
404 
405  // This means libcurl will use Basic, Digest, GSS Negotiate, or NTLM,
406  // choosing the the 'safest' one supported by the server.
407  // This requires curl 7.10.6 which is still in pre-release. 07/25/03 jhrg
408  curl_easy_setopt(curl, CURLOPT_HTTPAUTH, (long)CURLAUTH_ANY);
409 
410  // I added these next three to support Hyrax accessing data held behind URS auth. ndp - 8/20/18
411  curl_easy_setopt(curl, CURLOPT_NETRC, 1);
412 
413  // #TODO #FIXME Make these file names configuration based.
414  curl_easy_setopt(curl, CURLOPT_COOKIEFILE, "/tmp/.hyrax_cookies");
415  curl_easy_setopt(curl, CURLOPT_COOKIEJAR, "/tmp/.hyrax_cookies");
416 
417  curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 1);
418  curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1);
419  curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, save_raw_http_headers);
420  // In read_url a call to CURLOPT_WRITEHEADER is used to set the fourth
421  // param of save_raw_http_headers to a vector<string> object.
422 
423  // Follow 302 (redirect) responses
424  curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
425  curl_easy_setopt(curl, CURLOPT_MAXREDIRS, 5);
426 
427  // Set the user agent to curls version response because, well, that's what command line curl does :)
428  curl_easy_setopt(curl, CURLOPT_USERAGENT, curl_version());
429 
430 #if 0
431  // If the user turns off SSL validation...
432  if (!d_rcr->get_validate_ssl() == 0) {
433  curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0);
434  curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0);
435  }
436 
437  // Look to see if cookies are turned on in the .dodsrc file. If so,
438  // activate here. We honor 'session cookies' (cookies without an
439  // expiration date) here so that session-base SSO systems will work as
440  // expected.
441  if (!d_cookie_jar.empty()) {
442  BESDEBUG(cerr << "Setting the cookie jar to: " << d_cookie_jar << endl);
443  curl_easy_setopt(curl, CURLOPT_COOKIEJAR, d_cookie_jar.c_str());
444  curl_easy_setopt(curl, CURLOPT_COOKIESESSION, 1);
445  }
446 #endif
447 
448  if (curl_trace) {
449  BESDEBUG(MODULE, prolog << "Curl version: " << curl_version() << endl);
450  curl_easy_setopt(curl, CURLOPT_VERBOSE, 1);
451  BESDEBUG(MODULE, prolog << "Curl in verbose mode."<< endl);
452  curl_easy_setopt(curl, CURLOPT_DEBUGFUNCTION, curl_debug);
453  BESDEBUG(MODULE, prolog << "Curl debugging function installed."<< endl);
454  }
455 
456  BESDEBUG(MODULE, prolog << "curl: " << curl << endl);
457 
458  return curl;
459 }
460 
476 long read_url(CURL *curl, const string &url, int fd, vector<string> *resp_hdrs, const vector<string> *request_headers, char error_buffer[])
477 {
478  BESDEBUG(MODULE, prolog << "BEGIN" << endl);
479  BESDEBUG(MODULE, prolog << "url: " << url << endl);
480 
481  // Before we do anything, make sure that the URL is OK to pursue.
482  if (!bes::WhiteList::get_white_list()->is_white_listed(url)) {
483  string err = string("The specified URL ") + url + " does not match any of the accessible services in the white list.";
484  BESDEBUG(MODULE, prolog << err << endl);
485  throw BESSyntaxUserError(err, __FILE__, __LINE__);
486  }
487 
488  curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
489  curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, writeToOpenfileDescriptor);
490 
491 #ifdef CURLOPT_WRITEDATA
492  curl_easy_setopt(curl, CURLOPT_WRITEDATA, &fd);
493 #else
494  curl_easy_setopt(curl, CURLOPT_FILE, &fd);
495 #endif
496 
497  BuildHeaders req_hdrs;
498  if (request_headers) req_hdrs = for_each(request_headers->begin(), request_headers->end(), req_hdrs);
499  curl_easy_setopt(curl, CURLOPT_HTTPHEADER, req_hdrs.get_headers());
500 
501  // Pass save_raw_http_headers() a pointer to the vector<string> where the
502  // response headers may be stored. Callers can use the resp_hdrs
503  // value/result parameter to get the raw response header information.
504  curl_easy_setopt(curl, CURLOPT_WRITEHEADER, resp_hdrs);
505 
506 
507  char *urlp = NULL;
508  curl_easy_getinfo(curl, CURLINFO_EFFECTIVE_URL, &urlp);
509  BESDEBUG(MODULE, prolog << "url in curl object: " << urlp << endl);
510 
511  // This call is the one that makes curl go get the thing.
512  CURLcode res = curl_easy_perform(curl);
513 
514  if (res != CURLE_OK) {
515  BESDEBUG(MODULE, prolog << "OUCH! CURL returned an error! curl msg: " << curl_easy_strerror(res) << endl);
516  throw BESInternalError(string("CURL returned an error! curl msg: ").append(curl_easy_strerror(res)), __FILE__, __LINE__);
517  }
518 
519  // Free the header list and null the value in d_curl.
520  curl_slist_free_all(req_hdrs.get_headers());
521  curl_easy_setopt(curl, CURLOPT_HTTPHEADER, 0);
522 
523  long status;
524  res = curl_easy_getinfo(curl, CURLINFO_HTTP_CODE, &status);
525  BESDEBUG(MODULE, prolog << "HTTP Status " << status << endl);
526 
527  if (res != CURLE_OK) {
528  ostringstream oss;
529  oss << "HTTP Status: " << status;
530  throw BESInternalError(oss.str().append("; ").append(curl_easy_strerror(res)), __FILE__, __LINE__);
531  }
532 
533  BESDEBUG(MODULE, prolog << "END" << endl);
534 
535  return status;
536 }
537 
538 } /* namespace httpd_catalog */
BESInternalFatalError
exception thrown if an internal error is found and is fatal to the BES
Definition: BESInternalFatalError.h:43
BESSyntaxUserError
error thrown if there is a user syntax error in the request or any other user error
Definition: BESSyntaxUserError.h:41
bes::WhiteList::get_white_list
static WhiteList * get_white_list()
Static accessor for the singleton.
Definition: WhiteList.cc:55
BESInternalError
exception thrown if internal error encountered
Definition: BESInternalError.h:43