38 #include <BESInternalError.h>
40 #include "curl_utils.h"
41 #include "HttpdCatalogNames.h"
42 #include "HttpdCatalogUtils.h"
43 #include "RemoteHttpResource.h"
44 #include "RemoteHttpResourceCache.h"
48 #define prolog string("RemoteHttpResource::").append(__func__).append("() - ")
50 namespace httpd_catalog {
57 RemoteHttpResource::RemoteHttpResource(
const string &const_url)
59 d_initialized =
false;
62 d_resourceCacheFileName.clear();
63 d_response_headers =
new vector<string>();
64 d_request_headers =
new vector<string>();
65 d_http_response_headers =
new map<string, string>();
67 BESDEBUG(MODULE, prolog <<
"Passed url: " << const_url << endl);
69 string url = const_url;
71 string err =
"RemoteHttpResource(): Remote resource URL is empty";
75 size_t file_index = url.find(
"file://");
76 if( file_index!=url.npos && file_index==0 && *url.rbegin()==
'/'){
77 url = url.substr(0,url.length()-1);
80 d_remoteResourceUrl = url;
82 BESDEBUG(MODULE, prolog <<
"URL: " << d_remoteResourceUrl << endl);
91 d_curl = init(d_error_buffer);
93 configureProxy(d_curl, d_remoteResourceUrl);
95 BESDEBUG(MODULE, prolog <<
"d_curl: " << d_curl << endl);
97 RemoteHttpResource::~RemoteHttpResource()
103 BESDEBUG(MODULE, prolog <<
"BEGIN resourceURL: " << d_remoteResourceUrl << endl);
105 delete d_response_headers;
106 d_response_headers = 0;
107 BESDEBUG(MODULE, prolog <<
"Deleted d_response_headers." << endl);
109 delete d_request_headers;
110 d_request_headers = 0;
111 BESDEBUG(MODULE, prolog <<
"Deleted d_request_headers." << endl);
113 if (!d_resourceCacheFileName.empty()) {
117 BESDEBUG(MODULE, prolog <<
"Closed and unlocked "<< d_resourceCacheFileName << endl);
118 d_resourceCacheFileName.clear();
123 curl_easy_cleanup(d_curl);
124 BESDEBUG(MODULE, prolog <<
"Called curl_easy_cleanup()." << endl);
128 BESDEBUG(MODULE, prolog <<
"END resourceURL: " << d_remoteResourceUrl << endl);
129 d_remoteResourceUrl.clear();
139 void RemoteHttpResource::retrieveResource()
141 BESDEBUG(MODULE, prolog <<
"BEGIN resourceURL: " << d_remoteResourceUrl << endl);
144 BESDEBUG(MODULE, prolog <<
"END Already initialized." << endl);
152 oss << __func__ <<
"() - FAILED to get local cache."
153 " Unable to proceed with request for " << this->d_remoteResourceUrl <<
" The httpd_catalog MUST have a valid cache configuration to operate."
155 BESDEBUG(MODULE, oss.str());
162 BESDEBUG(MODULE, prolog <<
"d_resourceCacheFileName: " << d_resourceCacheFileName << endl);
170 HttpdCatalogUtils::get_type_from_url(d_remoteResourceUrl, d_type);
171 BESDEBUG(MODULE, prolog <<
"d_type: " << d_type << endl);
175 BESDEBUG(MODULE, prolog <<
"Remote resource is already in cache. cache_file_name: " << d_resourceCacheFileName << endl);
180 ifstream hdr_ifs(hdr_filename.c_str());
182 BESDEBUG(MODULE, prolog <<
"Reading response headers from: " << hdr_filename << endl);
183 for (
string line; getline(hdr_ifs, line);) {
184 (*d_response_headers).push_back(line);
185 BESDEBUG(MODULE, prolog <<
"header: " << line << endl);
192 ingest_http_headers_and_type();
193 d_initialized =
true;
205 writeResourceToFile(d_fd);
209 unlink(d_resourceCacheFileName.c_str());
218 ofstream hdr_out(hdr_filename.c_str());
220 for (
size_t i = 0; i < this->d_response_headers->size(); i++) {
221 hdr_out << (*d_response_headers)[i] << endl;
227 unlink(hdr_filename.c_str());
228 unlink(d_resourceCacheFileName.c_str());
238 BESDEBUG(MODULE, prolog <<
"Converted exclusive cache lock to shared lock." << endl);
244 BESDEBUG(MODULE, prolog <<
"Updated cache info" << endl);
248 BESDEBUG(MODULE, prolog <<
"Updated and purged cache." << endl);
250 BESDEBUG(MODULE, prolog <<
"END" << endl);
251 d_initialized =
true;
256 BESDEBUG(MODULE, prolog <<
"Remote resource is in cache. cache_file_name: " << d_resourceCacheFileName << endl);
257 d_initialized =
true;
262 string msg = prolog +
"Failed to acquire cache read lock for remote resource: '";
263 msg += d_remoteResourceUrl +
"\n";
266 throw libdap::Error(msg);
270 BESDEBUG(MODULE,
"RemoteHttpResource::retrieveResource() - Caught exception, unlocking cache and re-throw." << endl);
284 void RemoteHttpResource::writeResourceToFile(
int fd)
286 BESDEBUG(MODULE, prolog <<
"BEGIN" << endl);
295 "RemoteHttpResource::writeResourceToFile() - Saving resource " << d_remoteResourceUrl <<
" to cache file " << d_resourceCacheFileName << endl);
297 status = read_url(d_curl, d_remoteResourceUrl, fd, d_response_headers, d_request_headers, d_error_buffer);
300 BESDEBUG(MODULE, prolog <<
"HTTP returned an error status: " << status << endl);
302 oss <<
"Error while reading the URL: '";
303 oss << d_remoteResourceUrl;
304 oss <<
"' The HTTP request returned a status of " << status <<
" which means '";
305 oss << http_status_to_string(status) <<
"' \n";
309 BESDEBUG(MODULE, prolog <<
"Resource " << d_remoteResourceUrl <<
" saved to cache file " << d_resourceCacheFileName << endl);
317 status = lseek(fd, 0, SEEK_SET);
318 if (-1 == status)
throw BESError(
"Could not seek within the response.", BES_NOT_FOUND_ERROR, __FILE__, __LINE__);
320 BESDEBUG(MODULE, prolog <<
"Reset file descriptor." << endl);
322 ingest_http_headers_and_type();
326 catch (libdap::Error &e) {
331 BESDEBUG(MODULE, prolog <<
"END" << endl);
334 void RemoteHttpResource::ingest_http_headers_and_type()
336 BESDEBUG(MODULE, prolog <<
"BEGIN" << endl);
338 const string colon_space =
": ";
339 for (
size_t i = 0; i < this->d_response_headers->size(); i++) {
340 size_t colon_index = (*d_response_headers)[i].find(colon_space);
342 string value = (*d_response_headers)[i].substr(colon_index + colon_space.length());
343 BESDEBUG(MODULE, prolog <<
"key: " << key <<
" value: " << value << endl);
344 (*d_http_response_headers)[key] = value;
351 map<string, string>::iterator it;
353 it = d_http_response_headers->find(
"content-disposition");
354 if (it != d_http_response_headers->end()) {
355 cdisp_hdr = it->second;
358 it = d_http_response_headers->find(
"content-type");
359 if (it != d_http_response_headers->end()) {
360 ctype_hdr = it->second;
365 if (!cdisp_hdr.empty()) {
368 HttpdCatalogUtils::get_type_from_disposition(cdisp_hdr, type);
369 BESDEBUG(MODULE, prolog <<
"Evaluated content-disposition '" << cdisp_hdr <<
"' matched type: \"" << type <<
"\"" << endl);
376 if (type.empty() && !ctype_hdr.empty()) {
377 HttpdCatalogUtils::get_type_from_content_type(ctype_hdr, type);
378 BESDEBUG(MODULE, prolog <<
"Evaluated content-type '" << ctype_hdr <<
"' matched type \"" << type <<
"\"" << endl);
384 HttpdCatalogUtils::get_type_from_url(d_remoteResourceUrl, type);
385 BESDEBUG(MODULE, prolog <<
"Evaluated url '" << d_remoteResourceUrl <<
"' matched type: \"" << type <<
"\"" << endl);
390 string err = prolog +
"Unable to determine the type of data" +
" returned from '" + d_remoteResourceUrl +
"' Setting type to 'unknown'";
391 BESDEBUG(MODULE, err << endl);
398 BESDEBUG(MODULE, prolog <<
"END (dataset type: "<< d_type <<
")" << endl);
406 string RemoteHttpResource::get_http_response_header(
const string header_name)
409 map<string, string>::iterator it;
411 if (it != d_http_response_headers->end()) value = it->second;