35 #include "BESInternalError.h"
43 #include "curl_utils.h"
44 #include "RemoteHttpResource.h"
49 #define prolog std::string("RemoteHttpResource::").append(__func__).append("() - ")
57 RemoteHttpResource::RemoteHttpResource(
const string &url) {
58 d_initialized =
false;
61 d_resourceCacheFileName.clear();
62 d_response_headers =
new vector<string>();
63 d_request_headers =
new vector<string>();
64 d_http_response_headers =
new map<string,string>();
67 string err =
"RemoteHttpResource(): Remote resource URL is empty";
71 d_remoteResourceUrl = url;
73 BESDEBUG(MODULE, prolog <<
"URL: " << d_remoteResourceUrl << endl);
82 d_curl = init(d_error_buffer);
84 configureProxy(d_curl, d_remoteResourceUrl);
86 BESDEBUG(MODULE, prolog <<
"d_curl: " << d_curl << endl);
88 RemoteHttpResource::~RemoteHttpResource()
94 BESDEBUG(MODULE, prolog <<
"BEGIN resourceURL: " << d_remoteResourceUrl << endl);
96 delete d_response_headers;
97 d_response_headers = 0;
98 BESDEBUG(MODULE, prolog <<
"Deleted d_response_headers." << endl);
100 delete d_request_headers;
101 d_request_headers = 0;
102 BESDEBUG(MODULE, prolog <<
"Deleted d_request_headers." << endl);
104 if (!d_resourceCacheFileName.empty()) {
105 CmrCache *cache = CmrCache::get_instance();
108 BESDEBUG(MODULE, prolog <<
"Closed and unlocked "<< d_resourceCacheFileName << endl);
109 d_resourceCacheFileName.clear();
114 curl_easy_cleanup(d_curl);
115 BESDEBUG(MODULE, prolog <<
"Called curl_easy_cleanup()." << endl);
119 BESDEBUG(MODULE, prolog <<
"END resourceURL: " << d_remoteResourceUrl << endl);
120 d_remoteResourceUrl.clear();
130 void RemoteHttpResource::retrieveResource()
132 BESDEBUG(MODULE, prolog <<
"BEGIN resourceURL: " << d_remoteResourceUrl << endl);
135 BESDEBUG(MODULE, prolog <<
"END Already initialized." << endl);
140 CmrCache *cache = CmrCache::get_instance();
143 oss << __func__ <<
"() - FAILED to get local cache."
144 " Unable to proceed with request for " << this->d_remoteResourceUrl
145 <<
" The cmr_module MUST have a valid cache configuration to operate." << endl;
146 BESDEBUG(MODULE, oss.str());
153 BESDEBUG(MODULE, prolog <<
"d_resourceCacheFileName: " << d_resourceCacheFileName << endl);
161 CmrUtils::Get_type_from_url(d_remoteResourceUrl, d_type);
162 BESDEBUG(MODULE, prolog <<
"d_type: " << d_type << endl);
166 BESDEBUG(MODULE, prolog <<
"Remote resource is already in cache. cache_file_name: " << d_resourceCacheFileName << endl);
171 std::ifstream hdr_ifs(hdr_filename.c_str());
173 BESDEBUG(MODULE, prolog <<
"Reading response headers from: " << hdr_filename << endl);
174 for (std::string line; std::getline(hdr_ifs, line); ){
175 (*d_response_headers).push_back(line);
176 BESDEBUG(MODULE, prolog <<
"header: " << line << endl);
183 ingest_http_headers_and_type();
184 d_initialized =
true;
196 writeResourceToFile(d_fd);
200 unlink(d_resourceCacheFileName.c_str());
209 std::ofstream hdr_out(hdr_filename.c_str());
211 for(
size_t i=0; i<this->d_response_headers->size() ;i++){
212 hdr_out << (*d_response_headers)[i] << endl;
218 unlink(hdr_filename.c_str());
219 unlink(d_resourceCacheFileName.c_str());
229 BESDEBUG(MODULE, prolog <<
"Converted exclusive cache lock to shared lock." << endl);
235 BESDEBUG(MODULE, prolog <<
"Updated cache info" << endl);
239 BESDEBUG(MODULE, prolog <<
"Updated and purged cache." << endl);
241 BESDEBUG(MODULE, prolog <<
"END" << endl);
242 d_initialized =
true;
247 BESDEBUG(MODULE, prolog <<
"Remote resource is in cache. cache_file_name: " << d_resourceCacheFileName << endl);
248 d_initialized =
true;
253 string msg = prolog +
"Failed to acquire cache read lock for remote resource: '";
254 msg += d_remoteResourceUrl +
"\n";
255 throw libdap::Error(msg);
260 "RemoteHttpResource::retrieveResource() - Caught exception, unlocking cache and re-throw." << endl);
275 void RemoteHttpResource::writeResourceToFile(
int fd) {
276 BESDEBUG(MODULE, prolog <<
"BEGIN" << endl);
281 "RemoteHttpResource::writeResourceToFile() - Saving resource " << d_remoteResourceUrl <<
" to cache file " << d_resourceCacheFileName << endl);
283 status = read_url(d_curl, d_remoteResourceUrl, fd, d_response_headers, d_request_headers, d_error_buffer);
286 BESDEBUG(MODULE, prolog <<
"HTTP returned an error status: " << status << endl);
288 string msg =
"Error while reading the URL: '";
289 msg += d_remoteResourceUrl;
290 msg +=
"'The HTTP request returned a status of " + libdap::long_to_string(status) +
" which means '";
291 msg += http_status_to_string(status) +
"' \n";
292 throw libdap::Error(msg);
294 BESDEBUG(MODULE, prolog <<
"Resource " << d_remoteResourceUrl <<
" saved to cache file " << d_resourceCacheFileName << endl);
300 int status = lseek(fd, 0, SEEK_SET);
302 throw BESError(
"Could not seek within the response.", BES_NOT_FOUND_ERROR, __FILE__, __LINE__);
303 BESDEBUG(MODULE, prolog <<
"Reset file descriptor." << endl);
305 ingest_http_headers_and_type();
307 catch (libdap::Error &e) {
310 BESDEBUG(MODULE, prolog <<
"END" << endl);
314 void RemoteHttpResource::ingest_http_headers_and_type(){
315 BESDEBUG(MODULE, prolog <<
"BEGIN" << endl);
317 const string colon_space =
": ";
318 for(
size_t i=0; i<this->d_response_headers->size() ;i++){
319 size_t colon_index = (*d_response_headers)[i].find(colon_space);
321 string value = (*d_response_headers)[i].substr(colon_index + colon_space.length());
322 BESDEBUG(MODULE, prolog <<
"key: " << key <<
" value: " << value << endl);
323 (*d_http_response_headers)[key] = value;
331 std::map<string,string>::iterator it;
333 it = d_http_response_headers->find(
"content-disposition");
334 if(it != d_http_response_headers->end()){
335 cdisp_hdr = it->second;
338 it = d_http_response_headers->find(
"content-type");
339 if(it != d_http_response_headers->end()){
340 ctype_hdr = it->second;
343 if (!cdisp_hdr.empty()) {
346 CmrUtils::Get_type_from_disposition(cdisp_hdr, type);
347 BESDEBUG(MODULE,prolog <<
"Evaluated content-disposition '" << cdisp_hdr <<
"' matched type: \"" << type <<
"\"" << endl);
354 if (type.empty() && !ctype_hdr.empty()) {
355 CmrUtils::Get_type_from_content_type(ctype_hdr, type);
356 BESDEBUG(MODULE,prolog <<
"Evaluated content-type '" << ctype_hdr <<
"' matched type \"" << type <<
"\"" << endl);
362 CmrUtils::Get_type_from_url(d_remoteResourceUrl, type);
363 BESDEBUG(MODULE,prolog <<
"Evaluated url '" << d_remoteResourceUrl <<
"' matched type: \"" << type <<
"\"" << endl);
368 string err = prolog +
"Unable to determine the type of data"
369 +
" returned from '" + d_remoteResourceUrl +
"' Setting type to 'unknown'";
370 BESDEBUG(MODULE, err << endl);
375 BESDEBUG(MODULE, prolog <<
"END (dataset type: "<< d_type <<
")" << endl);
384 RemoteHttpResource::get_http_response_header(
const std::string header_name){
386 std::map<string,string>::iterator it;
388 if(it != d_http_response_headers->end())