bes  Updated for version 3.20.6
CurlHandlePool.cc
1 // -*- mode: c++; c-basic-offset:4 -*-
2 
3 // This file is part of the BES
4 
5 // Copyright (c) 2018 OPeNDAP, Inc.
6 // Author: James Gallagher<jgallagher@opendap.org>
7 //
8 // This library is free software; you can redistribute it and/or
9 // modify it under the terms of the GNU Lesser General Public
10 // License as published by the Free Software Foundation; either
11 // version 2.1 of the License, or (at your option) any later version.
12 //
13 // This library is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 // Lesser General Public License for more details.
17 //
18 // You should have received a copy of the GNU Lesser General Public
19 // License along with this library; if not, write to the Free Software
20 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 //
22 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
23 
24 #include "config.h"
25 
26 #include <string>
27 #include <locale>
28 #include <sstream>
29 #include <iomanip>
30 
31 #include <cstring>
32 #include <unistd.h>
33 #include <ctime>
34 
35 #include <curl/curl.h>
36 
37 #if HAVE_CURL_MULTI_H
38 #include <curl/multi.h>
39 #endif
40 
41 #include <time.h>
42 
43 #include "util.h" // long_to_string()
44 
45 #include "BESLog.h"
46 #include "BESDebug.h"
47 #include "BESInternalError.h"
48 #include "BESForbiddenError.h"
49 #include <TheBESKeys.h>
50 #include "WhiteList.h"
51 
52 #include "DmrppRequestHandler.h"
53 #include "DmrppCommon.h"
54 #include "awsv4.h"
55 #include "CurlHandlePool.h"
56 #include "Chunk.h"
57 #include "CredentialsManager.h"
58 
59 #define KEEP_ALIVE 1 // Reuse libcurl easy handles (1) or not (0).
60 
61 #define CURL_VERBOSE 0 // Logs curl info to the bes.log
62 
63 static const int MAX_WAIT_MSECS = 30*1000; // Wait max. 30 seconds
64 static const unsigned int retry_limit = 10; // Amazon's suggestion
65 static const unsigned int initial_retry_time = 1000; // one milli-second
66 
67 using namespace dmrpp;
68 using namespace std;
69 using namespace bes;
70 
71 #define MODULE "dmrpp:curl_handle_pool"
72 
73 Lock::Lock(pthread_mutex_t &lock) : m_mutex(lock)
74  {
75  int status = pthread_mutex_lock(&m_mutex);
76  if (status != 0) throw BESInternalError("Could not lock in CurlHandlePool", __FILE__, __LINE__);
77  }
78 
79 Lock::~Lock()
80  {
81  int status = pthread_mutex_unlock(&m_mutex);
82  if (status != 0)
83  ERROR("Could not unlock in CurlHandlePool");
84  }
85 
89 static string
90 curl_error_msg(CURLcode res, char *errbuf)
91 {
92  ostringstream oss;
93  size_t len = strlen(errbuf);
94  if (len) {
95  oss << errbuf;
96  oss << " (code: " << (int)res << ")";
97  }
98  else {
99  oss << curl_easy_strerror(res) << "(result: " << res << ")";
100  }
101 
102  return oss.str();
103 }
104 
110 #if 0
111 static
112 string dump(const char *text, unsigned char *ptr, size_t size)
113 {
114  size_t i;
115  size_t c;
116  unsigned int width=0x10;
117 
118  ostringstream oss;
119  oss << text << ", " << std::setw(10) << (long)size << std::setbase(16) << (long)size << endl;
120 
121  for(i=0; i<size; i+= width) {
122  oss << std::setw(4) << (long)i;
123  // fprintf(stream, "%4.4lx: ", (long)i);
124 
125  /* show hex to the left */
126  for(c = 0; c < width; c++) {
127  if(i+c < size) {
128  oss << std::setw(2) << ptr[i+c];
129  //fprintf(stream, "%02x ", ptr[i+c]);
130  }
131  else {
132  oss << " ";
133  // fputs(" ", stream);
134  }
135  }
136 
137  /* show data on the right */
138  for(c = 0; (c < width) && (i+c < size); c++) {
139  char x = (ptr[i+c] >= 0x20 && ptr[i+c] < 0x80) ? ptr[i+c] : '.';
140  // fputc(x, stream);
141  oss << std::setw(1) << x;
142  }
143 
144  // fputc('\n', stream); /* newline */
145  oss << endl;
146  }
147 
148  return oss.str();
149 }
150 #endif
151 
152 #if CURL_VERBOSE
153 
158 static
159 int curl_trace(CURL */*handle*/, curl_infotype type, char *data, size_t /*size*/, void */*userp*/)
160 {
161  string text = "";
162  switch (type) {
163  // print info
164  case CURLINFO_TEXT:
165  case CURLINFO_HEADER_OUT:
166  case CURLINFO_HEADER_IN: {
167  text = data;
168  size_t pos;
169  while ((pos = text.find('\n')) != string::npos)
170  text = text.substr(0, pos);
171  break;
172  }
173 
174  // Do not build up 'text' for the data transfers
175  case CURLINFO_DATA_OUT:
176  case CURLINFO_SSL_DATA_OUT:
177  case CURLINFO_DATA_IN:
178  case CURLINFO_SSL_DATA_IN:
179  default: /* in case a new one is introduced to shock us */
180  break;
181  }
182 
183  switch (type) {
184  // print info
185  case CURLINFO_TEXT:
186  LOG("libcurl == Info: " << text << endl);
187  break;
188 
189  case CURLINFO_HEADER_OUT:
190  LOG("libcurl == Send header: " << text << endl);
191  break;
192  case CURLINFO_HEADER_IN:
193  LOG("libcurl == Recv header: " << text << endl);
194  break;
195 #if 0
196  // Only print these if we're desperate and the above code has been hacked to match
197  case CURLINFO_DATA_OUT:
198  LOG("libcurl == Send data" << text << endl);
199  break;
200  case CURLINFO_SSL_DATA_OUT:
201  LOG("libcurl == Send SSL data" << text << endl);
202  break;
203  case CURLINFO_DATA_IN:
204  LOG("libcurl == Recv data" << text << endl);
205  break;
206  case CURLINFO_SSL_DATA_IN:
207  LOG("libcurl == Recv SSL data" << text << endl);
208  break;
209 #endif
210  default:
211  break;
212  }
213 
214  return 0;
215 }
216 #endif
217 
219 {
220  d_handle = curl_easy_init();
221  if (!d_handle) throw BESInternalError("Could not allocate CURL handle", __FILE__, __LINE__);
222 
223  CURLcode res;
224 
225  if (CURLE_OK != (res = curl_easy_setopt(d_handle, CURLOPT_ERRORBUFFER, d_errbuf)))
226  throw BESInternalError(string("CURL Error: ").append(curl_easy_strerror(res)), __FILE__, __LINE__);
227 
228 #if CURL_VERBOSE
229  if (CURLE_OK != (res = curl_easy_setopt(d_handle, CURLOPT_DEBUGFUNCTION, curl_trace)))
230  throw BESInternalError(string("CURL Error: ").append(curl_error_msg(res, d_errbuf)), __FILE__, __LINE__);
231  // Many tests fail with this option, but it's still useful to see how connections
232  // are treated. jhrg 10/2/18
233  if (CURLE_OK != (res = curl_easy_setopt(d_handle, CURLOPT_VERBOSE, 1L)))
234  throw BESInternalError(string("CURL Error: ").append(curl_error_msg(res, d_errbuf)), __FILE__, __LINE__);
235 #endif
236 
237  // Pass all data to the 'write_data' function
238  if (CURLE_OK != (res = curl_easy_setopt(d_handle, CURLOPT_WRITEFUNCTION, chunk_write_data)))
239  throw BESInternalError(string("CURL Error: ").append(curl_error_msg(res, d_errbuf)), __FILE__, __LINE__);
240 
241 #ifdef CURLOPT_TCP_KEEPALIVE
242  /* enable TCP keep-alive for this transfer */
243  if (CURLE_OK != (res = curl_easy_setopt(d_handle, CURLOPT_TCP_KEEPALIVE, 1L)))
244  throw BESInternalError(string("CURL Error: ").append(curl_error_msg(res)), __FILE__, __LINE__);
245 #endif
246 
247 #ifdef CURLOPT_TCP_KEEPIDLE
248  /* keep-alive idle time to 120 seconds */
249  if (CURLE_OK != (res = curl_easy_setopt(d_handle, CURLOPT_TCP_KEEPIDLE, 120L)))
250  throw BESInternalError(string("CURL Error: ").append(curl_error_msg(res)), __FILE__, __LINE__);
251 #endif
252 
253 #ifdef CURLOPT_TCP_KEEPINTVL
254  /* interval time between keep-alive probes: 120 seconds */
255  if (CURLE_OK != (res = curl_easy_setopt(d_handle, CURLOPT_TCP_KEEPINTVL, 120L)))
256  throw BESInternalError(string("CURL Error: ").append(curl_error_msg(res)), __FILE__, __LINE__)
257 #endif
258 
259  d_in_use = false;
260  d_url = "";
261  d_chunk = 0;
262 }
263 
264 dmrpp_easy_handle::~dmrpp_easy_handle()
265 {
266  if (d_handle) curl_easy_cleanup(d_handle);
267  if (d_headers) curl_slist_free_all(d_headers);
268 }
269 
278 static bool evaluate_curl_response(CURL* eh)
279 {
280  long http_code = 0;
281  CURLcode res = curl_easy_getinfo(eh, CURLINFO_RESPONSE_CODE, &http_code);
282  if (CURLE_OK != res) {
283  throw BESInternalError(string("Error getting HTTP response code: ").append(curl_error_msg(res, (char*)"")), __FILE__, __LINE__);
284  }
285 
286  // Newer Apache servers return 206 for range requests. jhrg 8/8/18
287  switch (http_code) {
288  case 200: // OK
289  case 206: // Partial content - this is to be expected since we use range gets
290  // cases 201-205 are things we should probably reject, unless we add more
291  // comprehensive HTTP/S processing here. jhrg 8/8/18
292  return true;
293 
294  case 500: // Internal server error
295  case 503: // Service Unavailable
296  case 504: // Gateway Timeout
297  return false;
298 
299  default: {
300  ostringstream oss;
301  oss << "HTTP status error: Expected an OK status, but got: ";
302  oss << http_code;
303  throw BESInternalError(oss.str(), __FILE__, __LINE__);
304  }
305  }
306 }
307 
314 {
315  // Treat HTTP/S requests specially; retry some kinds of failures.
316  if (d_url.find("https://") == 0 || d_url.find("http://") == 0) {
317  unsigned int tries = 0;
318  bool success = true;
319  unsigned int retry_time = initial_retry_time;
320 
321  // Perform the request
322  do {
323  CURLcode curl_code = curl_easy_perform(d_handle);
324  ++tries;
325 
326  if (CURLE_OK != curl_code) {
327  throw BESInternalError(string("Data transfer error: ").append(curl_error_msg(curl_code, d_errbuf)),
328  __FILE__, __LINE__);
329  }
330 
331  success = evaluate_curl_response(d_handle);
332 
333  if (!success) {
334  if (tries == retry_limit) {
335  throw BESInternalError(
336  string("Data transfer error: Number of re-tries to S3 exceeded: ").append(
337  curl_error_msg(curl_code, d_errbuf)), __FILE__, __LINE__);
338  }
339  else {
340  LOG("HTTP transfer 500 error, will retry (trial " << tries << " for: " << d_url << ").");
341  usleep(retry_time);
342  retry_time *= 2;
343  }
344  }
345 
346  curl_slist_free_all(d_headers);
347  d_headers = 0;
348  } while (!success);
349  }
350  else {
351  CURLcode curl_code = curl_easy_perform(d_handle);
352  if (CURLE_OK != curl_code) {
353  throw BESInternalError(string("Data transfer error: ").append(curl_error_msg(curl_code, d_errbuf)),
354  __FILE__, __LINE__);
355  }
356  }
357 
358  d_chunk->set_is_read(true);
359 }
360 
368 struct dmrpp_multi_handle::multi_handle {
369 #if HAVE_CURL_MULTI_API
370  CURLM *curlm;
371 #else
372  std::vector<dmrpp_easy_handle *> ehandles;
373 #endif
374 };
375 
376 dmrpp_multi_handle::dmrpp_multi_handle()
377 {
378  p_impl = new multi_handle;
379 #if HAVE_CURL_MULTI_API
380  p_impl->curlm = curl_multi_init();
381 #endif
382 }
383 
384 dmrpp_multi_handle::~dmrpp_multi_handle()
385 {
386 #if HAVE_CURL_MULTI_API
387  curl_multi_cleanup(p_impl->curlm);
388 #endif
389  delete p_impl;
390 }
391 
401 {
402 #if HAVE_CURL_MULTI_API
403  curl_multi_add_handle(p_impl->curlm, eh->d_handle);
404 #else
405  p_impl->ehandles.push_back(eh);
406 #endif
407 }
408 
409 // This is only used if we don't have the Multi API and have to use pthreads.
410 // jhrg 8/27/18
411 #if !HAVE_CURL_MULTI_API
412 static void *easy_handle_read_data(void *handle)
413 {
414  dmrpp_easy_handle *eh = reinterpret_cast<dmrpp_easy_handle*>(handle);
415 
416  try {
417  eh->read_data();
418  pthread_exit(0);
419  }
420  catch (BESError &e) {
421  string *error = new string(e.get_verbose_message());
422  pthread_exit(error);
423  }
424 }
425 #endif
426 
440 {
441 #if HAVE_CURL_MULTI_API
442  // Use the libcurl Multi API here. Alternate version follows...
443 
444  int still_running = 0;
445  CURLMcode mres = curl_multi_perform(p_impl->curlm, &still_running);
446  if (mres != CURLM_OK)
447  throw BESInternalError(string("Could not initiate data read: ").append(curl_multi_strerror(mres)), __FILE__,
448  __LINE__);
449 
450  do {
451  int numfds = 0;
452  mres = curl_multi_wait(p_impl->curlm, NULL, 0, MAX_WAIT_MSECS, &numfds);
453  if (mres != CURLM_OK)
454  throw BESInternalError(string("Could not wait on data read: ").append(curl_multi_strerror(mres)), __FILE__,
455  __LINE__);
456 
457  mres = curl_multi_perform(p_impl->curlm, &still_running);
458  if (mres != CURLM_OK)
459  throw BESInternalError(string("Could not iterate data read: ").append(curl_multi_strerror(mres)), __FILE__,
460  __LINE__);
461 
462  } while (still_running);
463 
464  CURLMsg *msg = 0;
465  int msgs_left = 0;
466  while ((msg = curl_multi_info_read(p_impl->curlm, &msgs_left))) {
467  if (msg->msg == CURLMSG_DONE) {
468  CURL *eh = msg->easy_handle;
469 
470  CURLcode res = msg->data.result;
471  if (res != CURLE_OK)
472  throw BESInternalError(string("Error HTTP: ").append(curl_easy_strerror(res)), __FILE__, __LINE__);
473 
474  // Note: 'eh' is the easy handle returned by culr_multi_info_read(),
475  // but in it's private field is our dmrpp_easy_handle object. We need
476  // both to mark this data read operation as complete.
478  res = curl_easy_getinfo(eh, CURLINFO_PRIVATE, &dmrpp_easy_handle);
479  if (res != CURLE_OK)
480  throw BESInternalError(string("Could not access easy handle: ").append(curl_easy_strerror(res)), __FILE__, __LINE__);
481 
482  // This code has to work with both http/s: and file: protocols. Here we check the
483  // HTTP status code. If the protocol is not HTTP, we assume since msg->data.result
484  // returned CURLE_OK, that the transfer worked. jhrg 5/1/18
485  if (dmrpp_easy_handle->d_url.find("http://") == 0 || dmrpp_easy_handle->d_url.find("https://") == 0) {
486  evaluate_curl_response(eh);
487  }
488 
489  // If we are here, the request was successful.
490  dmrpp_easy_handle->d_chunk->set_is_read(true); // Set the is_read() property for chunk here.
491 
492  // NB: Remove the handle from the CURLM* and _then_ call release_handle()
493  // so that the KEEP_ALIVE 0 (off) works. Calling delete on the dmrpp_easy_handle
494  // will invalidate 'eh', so call that after removing 'eh'.
495  mres = curl_multi_remove_handle(p_impl->curlm, eh);
496  if (mres != CURLM_OK)
497  throw BESInternalError(string("Could not remove libcurl handle: ").append(curl_multi_strerror(mres)), __FILE__, __LINE__);
498 
499  DmrppRequestHandler::curl_handle_pool->release_handle(dmrpp_easy_handle);
500  }
501  else { // != CURLMSG_DONE
502  throw BESInternalError("Error getting HTTP or FILE responses.", __FILE__, __LINE__);
503  }
504  }
505 #else
506  // Start the processing pipelines using pthreads - there is no Multi API
507 
508  pthread_t threads[p_impl->ehandles.size()];
509  unsigned int num_threads = 0;
510  try {
511  for (unsigned int i = 0; i < p_impl->ehandles.size(); ++i) {
512  int status = pthread_create(&threads[i], NULL, easy_handle_read_data, (void*) p_impl->ehandles[i]);
513  if (status == 0) {
514  ++num_threads;
515  }
516  else {
517  ostringstream oss("Could not start process_one_chunk_unconstrained thread for chunk ", std::ios::ate);
518  oss << i << ": " << strerror(status);
519  throw BESInternalError(oss.str(), __FILE__, __LINE__);
520  }
521  }
522 
523  // Now join the child threads.
524  for (unsigned int i = 0; i < num_threads; ++i) {
525  string *error;
526  int status = pthread_join(threads[i], (void**) &error);
527  if (status != 0) {
528  ostringstream oss("Could not join process_one_chunk_unconstrained thread for chunk ", std::ios::ate);
529  oss << i << ": " << strerror(status);
530  throw BESInternalError(oss.str(), __FILE__, __LINE__);
531  }
532  else if (error != 0) {
533  BESInternalError e(*error, __FILE__, __LINE__);
534  delete error;
535  throw e;
536  }
537  }
538  }
539  catch(...) {
540  join_threads(threads, num_threads);
541  throw;
542  }
543 
544  // Now remove the easy_handles, mimicking the behavior when using the real Multi API
545  p_impl->ehandles.clear();
546 #endif
547 }
548 
549 CurlHandlePool::CurlHandlePool() : d_multi_handle(0)
550 {
551  d_max_easy_handles = DmrppRequestHandler::d_max_parallel_transfers;
552  d_multi_handle = new dmrpp_multi_handle();
553 
554  for (unsigned int i = 0; i < d_max_easy_handles; ++i) {
555  d_easy_handles.push_back(new dmrpp_easy_handle());
556  }
557 
558  if (pthread_mutex_init(&d_get_easy_handle_mutex, 0) != 0)
559  throw BESInternalError("Could not initialize mutex in CurlHandlePool", __FILE__, __LINE__);
560 }
561 
573 static struct curl_slist *
574 append_http_header(curl_slist *slist, const string &header, const string &value)
575 {
576  string full_header = header;
577  full_header.append(" ").append(value);
578 
579  struct curl_slist *temp = curl_slist_append(slist, full_header.c_str());
580  return temp;
581 }
582 
583 #if 0
584 // TODO Make this real! jhrg 11/26/19
585 static bool
586 url_has_credentials(const string &url)
587 {
588  return (url.find("cloudyopendap") != string::npos);
589 }
590 
591 static bool
592 url_must_be_signed(const string &url)
593 {
594 
595  if(url.find("http://") == 0 || url.find("https://") == 0){
596  AccessCredentials *ac = CredentialsManager::theCM()->get(url);
597  if(ac)
598  return ac->isS3Cred();
599  }
600  return false;
601  // return (url.find("http://") == 0 || url.find("https://") == 0) && url_has_credentials(url);
602 }
603 
604 //I think this is closer to working now and that we don't need these functions - ndp 12/12/19'
605 
606 // FIXME The most low-budget credential DB on the planet. jhrg 11/26/19
607 struct aws_credentials {
608  string public_key; // = "AKIA24JBYMSH64NYGEIE";
609  string secret_key; // = "*************WaaQ7";
610  string region; // = "us-east-1";
611  string bucket_name; // = "muhbucket";
612 
613  map<string,map<string,string>> credentials;
614 
615 
616  aws_credentials(): public_key(""), secret_key(""), region(""), bucket_name("") {}
617 
618  aws_credentials(const string &p_key, const string &s_key, const string &r, const string &b)
619  : public_key(p_key), secret_key(s_key), region(r), bucket_name(b) {}
620 
621  aws_credentials(const aws_credentials &rhs)
622  : public_key(rhs.public_key), secret_key(rhs.secret_key), region(rhs.region), bucket_name(rhs.bucket_name) {}
623 
624  unique_ptr<aws_credentials> get(const string &url);
625 };
626 
627 void get_from_env(const string &key, string &value){
628  const char *cstr = getenv(key.c_str());
629  if(cstr){
630  value.assign(cstr);
631  BESDEBUG(MODULE, __FILE__ << " " << __LINE__ << " From system environment - " << key << ": " << value << endl);
632  }
633  else {
634  value.clear();
635  }
636 }
637 
638 void get_from_config(const string &key, string &value){
639  bool key_found=false;
640  TheBESKeys::TheKeys()->get_value(key, value, key_found);
641  if (key_found) {
642  BESDEBUG(MODULE, __FILE__ << " " << __LINE__ << " Using " << key << " from TheBESKeys" << endl);
643  }
644  else {
645  value.clear();
646  }
647 }
648 
649 void get_creds_from_local(string &aws_akid, string &aws_sak, string &aws_region, string &aws_s3_bucket ){
650 
651  const string KEYS_CONFIG_PREFIX("DMRPP");
652 
653  const string ENV_AKID_KEY("AWS_ACCESS_KEY_ID");
654  const string CONFIG_AKID_KEY(KEYS_CONFIG_PREFIX+"."+ENV_AKID_KEY);
655 
656  const string ENV_SAK_KEY("AWS_SECRET_ACCESS_KEY");
657  const string CONFIG_SAK_KEY(KEYS_CONFIG_PREFIX+"."+ENV_SAK_KEY);
658 
659  const string ENV_REGION_KEY("AWS_REGION");
660  const string CONFIG_REGION_KEY(KEYS_CONFIG_PREFIX+"."+ENV_REGION_KEY);
661 
662  const string ENV_S3_BUCKET_KEY("AWS_S3_BUCKET");
663  const string CONFIG_S3_BUCKET_KEY(KEYS_CONFIG_PREFIX+"."+ENV_S3_BUCKET_KEY);
664 
665 #ifndef NDEBUG
666 
667  // If we are in developer mode then we compile this section which
668  // allows us to inject credentials via the system environment
669 
670  get_from_env(ENV_AKID_KEY,aws_akid);
671  get_from_env(ENV_SAK_KEY,aws_sak);
672  get_from_env(ENV_REGION_KEY,aws_region);
673  get_from_env(ENV_S3_BUCKET_KEY,aws_s3_bucket);
674 
675  BESDEBUG(MODULE, __FILE__ << " " << __LINE__
676  << " From ENV aws_akid: '" << aws_akid << "' "
677  << "aws_sak: '" << aws_sak << "' "
678  << "aws_region: '" << aws_region << "' "
679  << "aws_s3_bucket: '" << aws_s3_bucket << "' "
680  << endl);
681 
682 #endif
683 
684  // In production mode this is the single point of ingest for credentials.
685  // Developer mode enables the piece above which allows the environment to
686  // overrule the configuration
687 
688  if(aws_akid.length()){
689  BESDEBUG(MODULE, __FILE__ << " " << __LINE__ << " Using " << ENV_AKID_KEY << " from the environment." << endl);
690  }
691  else {
692  get_from_config(CONFIG_AKID_KEY,aws_akid);
693  }
694 
695  if(aws_sak.length()){
696  BESDEBUG(MODULE, __FILE__ << " " << __LINE__ << " Using " << ENV_SAK_KEY << " from the environment." << endl);
697  }
698  else {
699  get_from_config(CONFIG_SAK_KEY,aws_sak);
700  }
701 
702  if(aws_region.length()){
703  BESDEBUG(MODULE, __FILE__ << " " << __LINE__ << " Using " << ENV_REGION_KEY << " from the environment." << endl);
704  }
705  else {
706  get_from_config(CONFIG_REGION_KEY,aws_region);
707  }
708 
709  if(aws_s3_bucket.length()){
710  BESDEBUG(MODULE, __FILE__ << " " << __LINE__ << " Using " << ENV_S3_BUCKET_KEY << " from the environment." << endl);
711  }
712  else {
713  get_from_config(CONFIG_S3_BUCKET_KEY,aws_s3_bucket);
714  }
715 
716  BESDEBUG(MODULE, __FILE__ << " " << __LINE__
717  << " END aws_akid: '" << aws_akid << "' "
718  << "aws_sak: '" << aws_sak << "' "
719  << "aws_region: '" << aws_region << "' "
720  << "aws_s3_bucket: '" << aws_s3_bucket << "' "
721  << endl);
722 }
723 
724 unique_ptr<aws_credentials>
725 aws_credentials::get(const string &url)
726 {
727  // FIXME Lookup the credentials in some db (BES Keys?). jhrg 11/26/19
728 
729  string aws_akid;
730  string aws_sak;
731  string aws_region;
732  string aws_s3_bucket;
733 
734  if (url.find("cloudyopendap") != string::npos) {
735 
736  get_creds_from_local(aws_akid, aws_sak, aws_region, aws_s3_bucket);
737  BESDEBUG(MODULE, __FILE__ << " " << __LINE__
738  << " aws_akid: " << aws_akid
739  << " aws_sak: " << aws_sak
740  << " aws_region: " << aws_region
741  << " aws_s3_bucket: " << aws_s3_bucket
742  << endl);
743 
744  unique_ptr<aws_credentials> creds(new aws_credentials(aws_akid, aws_sak, aws_region, aws_s3_bucket));
745  return creds;
746  } else {
747  unique_ptr<aws_credentials> creds(new aws_credentials( "", "", "", ""));
748  return creds;
749  }
750 }
751 #endif
752 
769 {
770  // Here we check to make sure that the we are only going to
771  // access an approved location with this easy_handle
772  if(!WhiteList::get_white_list()->is_white_listed(chunk->get_data_url())){
773  string msg = "ERROR!! The chunk url " + chunk->get_data_url() + " does not match any white-list rule. ";
774  throw BESForbiddenError(msg ,__FILE__,__LINE__);
775  }
776 
777  Lock lock(d_get_easy_handle_mutex); // RAII
778 
779  dmrpp_easy_handle *handle = 0;
780  for (vector<dmrpp_easy_handle *>::iterator i = d_easy_handles.begin(), e = d_easy_handles.end(); i != e; ++i) {
781  if (!(*i)->d_in_use) {
782  handle = *i;
783  break;
784  }
785  }
786 
787  if (handle) {
788  // Once here, d_easy_handle holds a CURL* we can use.
789  handle->d_in_use = true;
790  handle->d_url = chunk->get_data_url();
791 
792  handle->d_chunk = chunk;
793 
794  CURLcode res = curl_easy_setopt(handle->d_handle, CURLOPT_URL, chunk->get_data_url().c_str());
795  if (res != CURLE_OK) throw BESInternalError(string("HTTP Error setting URL: ").append(curl_error_msg(res, handle->d_errbuf)), __FILE__, __LINE__);
796 
797  // get the offset to offset + size bytes
798  if (CURLE_OK != (res = curl_easy_setopt(handle->d_handle, CURLOPT_RANGE, chunk->get_curl_range_arg_string().c_str())))
799  throw BESInternalError(string("HTTP Error setting Range: ").append(curl_error_msg(res, handle->d_errbuf)), __FILE__,
800  __LINE__);
801 
802  // Pass this to write_data as the fourth argument
803  if (CURLE_OK != (res = curl_easy_setopt(handle->d_handle, CURLOPT_WRITEDATA, reinterpret_cast<void*>(chunk))))
804  throw BESInternalError(string("CURL Error setting chunk as data buffer: ").append(curl_error_msg(res, handle->d_errbuf)),
805  __FILE__, __LINE__);
806 
807  // store the easy_handle so that we can call release_handle in multi_handle::read_data()
808  if (CURLE_OK != (res = curl_easy_setopt(handle->d_handle, CURLOPT_PRIVATE, reinterpret_cast<void*>(handle))))
809  throw BESInternalError(string("CURL Error setting easy_handle as private data: ").append(curl_error_msg(res, handle->d_errbuf)), __FILE__,
810  __LINE__);
811 
812  AccessCredentials *credentials = CredentialsManager::theCM()->get(handle->d_url);
813  if ( credentials && credentials->isS3Cred()) {
814  BESDEBUG(MODULE, "Got AccessCredentials instance: "<< endl << credentials->to_json() << endl );
815  // If there are available credentials, and they are S3 credentials then we need to sign
816  // the request
817  const std::time_t request_time = std::time(0);
818 
819  const std::string auth_header =
820  AWSV4::compute_awsv4_signature(
821  handle->d_url,
822  request_time,
823  credentials->get(AccessCredentials::ID_KEY),
824  credentials->get(AccessCredentials::KEY_KEY),
825  credentials->get(AccessCredentials::REGION_KEY),
826  "s3",
827  BESDebug::IsSet(MODULE));
828 
829  // passing nullptr for the first call allocates the curl_slist
830  // The following code builds the slist that holds the headers. This slist is freed
831  // once the URL is dereferenced in dmrpp_easy_handle::read_data(). jhrg 11/26/19
832  handle->d_headers = append_http_header(0, "Authorization:", auth_header);
833  if (!handle->d_headers)
834  throw BESInternalError(
835  string("CURL Error setting Authorization header: ").append(
836  curl_error_msg(res, handle->d_errbuf)), __FILE__, __LINE__);
837 
838  // We pre-compute the sha256 hash of a null message body
839  curl_slist *temp = append_http_header(handle->d_headers, "x-amz-content-sha256:", "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855");
840  if (!temp)
841  throw BESInternalError(
842  string("CURL Error setting x-amz-content-sha256: ").append(curl_error_msg(res, handle->d_errbuf)),
843  __FILE__, __LINE__);
844  handle->d_headers = temp;
845 
846  temp = append_http_header(handle->d_headers, "x-amz-date:", AWSV4::ISO8601_date(request_time));
847  if (!temp)
848  throw BESInternalError(
849  string("CURL Error setting x-amz-date header: ").append(curl_error_msg(res, handle->d_errbuf)),
850  __FILE__, __LINE__);
851  handle->d_headers = temp;
852 
853 
854  if (CURLE_OK != (res = curl_easy_setopt(handle->d_handle, CURLOPT_HTTPHEADER, handle->d_headers)))
855  throw BESInternalError(string("CURL Error setting HTTP headers for S3 authentication: ").append(
856  curl_error_msg(res, handle->d_errbuf)), __FILE__, __LINE__);
857  }
858  }
859 
860  return handle;
861 }
862 
870 {
871  // In get_easy_handle, it's possible that d_in_use could be false and d_chunk
872  // could not be set to 0 (because a separate thread could be running these
873  // methods). In that case, the thread running get_easy_handle could set d_chunk,
874  // and then this thread could clear it (... unlikely, but an optimizing compiler is
875  // free to reorder statements so long as they don't alter the function's behavior).
876  // Timing tests indicate this lock does not cost anything that can be measured.
877  // jhrg 8/21/18
878  Lock lock(d_get_easy_handle_mutex);
879 
880 #if KEEP_ALIVE
881  handle->d_url = "";
882  handle->d_chunk = 0;
883  handle->d_in_use = false;
884 #else
885  // This is to test the effect of libcurl Keep Alive support
886  // Find the handle; erase from the vector; delete; allocate a new handle and push it back on
887  for (std::vector<dmrpp_easy_handle *>::iterator i = d_easy_handles.begin(), e = d_easy_handles.end(); i != e; ++i) {
888  if (*i == handle) {
889  BESDEBUG("dmrpp:5", "Found a handle match for the " << i - d_easy_handles.begin() << "th easy handle." << endl);
890  delete handle;
891  *i = new dmrpp_easy_handle();
892  break;
893  }
894  }
895 #endif
896 }
AccessCredentials::get
std::string get(const std::string &key)
Definition: CredentialsManager.cc:418
dmrpp::Chunk
Definition: Chunk.h:43
dmrpp::dmrpp_easy_handle
Bundle a libcurl easy handle to other information.
Definition: CurlHandlePool.h:61
dmrpp::dmrpp_easy_handle::read_data
void read_data()
This is the read_data() method for serial transfers.
Definition: CurlHandlePool.cc:313
dmrpp::dmrpp_easy_handle::dmrpp_easy_handle
dmrpp_easy_handle()
Build a string with hex info about stuff libcurl gets.
Definition: CurlHandlePool.cc:218
dmrpp::dmrpp_multi_handle::read_data
void read_data()
The read_data() method for parallel transfers.
Definition: CurlHandlePool.cc:439
TheBESKeys::TheKeys
static TheBESKeys * TheKeys()
Definition: TheBESKeys.cc:62
BESDebug::IsSet
static bool IsSet(const std::string &flagName)
see if the debug context flagName is set to true
Definition: BESDebug.h:157
dmrpp::dmrpp_multi_handle
Encapsulate a libcurl multi handle.
Definition: CurlHandlePool.h:83
BESForbiddenError
error thrown if the BES is not allowed to access the resource requested
Definition: BESForbiddenError.h:40
BESInternalError
exception thrown if internal error encountered
Definition: BESInternalError.h:43
TheBESKeys::get_value
void get_value(const std::string &s, std::string &val, bool &found)
Retrieve the value of a given key, if set.
Definition: TheBESKeys.cc:272
dmrpp::dmrpp_multi_handle::add_easy_handle
void add_easy_handle(dmrpp_easy_handle *eh)
Add an Easy Handle to a Multi Handle object.
Definition: CurlHandlePool.cc:400
dmrpp::Chunk::get_curl_range_arg_string
virtual std::string get_curl_range_arg_string()
Returns a curl range argument. The libcurl requires a string argument for range-ge activitys,...
Definition: Chunk.cc:383
CredentialsManager::get
AccessCredentials * get(const std::string &url)
Definition: CredentialsManager.cc:166
dmrpp::Chunk::get_data_url
virtual std::string get_data_url() const
Get the data url string for this Chunk's data block.
Definition: Chunk.h:177
dmrpp::CurlHandlePool::get_easy_handle
dmrpp_easy_handle * get_easy_handle(Chunk *chunk)
Definition: CurlHandlePool.cc:768
dmrpp::Lock
Definition: CurlHandlePool.h:41
AccessCredentials
Definition: CredentialsManager.h:36
BESError
Abstract exception class for the BES with basic string message.
Definition: BESError.h:58
AccessCredentials::isS3Cred
bool isS3Cred()
Definition: CredentialsManager.cc:443
dmrpp::CurlHandlePool::release_handle
void release_handle(dmrpp_easy_handle *h)
Definition: CurlHandlePool.cc:869