bes  Updated for version 3.20.6
CmrUtils.cc
1 // CmrUtils.cc
2 
3 // -*- mode: c++; c-basic-offset:4 -*-
4 
5 // This file is part of gateway_module, A C++ module that can be loaded in to
6 // the OPeNDAP Back-End Server (BES) and is able to handle remote requests.
7 
8 // Copyright (c) 2002,2003 OPeNDAP, Inc.
9 // Author: Nathan Potter <ndp@opendap.org>
10 //
11 // This library is free software; you can redistribute it and/or
12 // modify it under the terms of the GNU Lesser General Public
13 // License as published by the Free Software Foundation; either
14 // version 2.1 of the License, or (at your option) any later version.
15 //
16 // This library is distributed in the hope that it will be useful,
17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 // Lesser General Public License for more details.
20 //
21 // You should have received a copy of the GNU Lesser General Public
22 // License along with this library; if not, write to the Free Software
23 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 //
25 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
26 
27 #include "config.h"
28 
29 #ifdef HAVE_UNISTD_H
30 #include <unistd.h>
31 #endif
32 #include <cstdlib>
33 #include <cstring>
34 #include <map>
35 #include <vector>
36 #include <curl/curl.h>
37 
38 
39 #include <BESUtil.h>
40 #include <BESCatalogUtils.h>
41 #include <BESCatalogList.h>
42 #include <BESCatalog.h>
43 #include <BESRegex.h>
44 #include <TheBESKeys.h>
45 #include <BESInternalError.h>
46 #include <BESDapError.h>
47 #include <BESNotFoundError.h>
48 #include <BESSyntaxUserError.h>
49 #include <BESDebug.h>
50 
51 #include "CmrNames.h"
52 #include "CmrUtils.h"
53 #include "CmrApi.h"
54 
55 #include <GNURegex.h>
56 #include <util.h>
57 
58 using namespace libdap;
59 using namespace cmr;
60 using std::vector;
61 
62 std::map<string, string> CmrUtils::MimeList;
63 string CmrUtils::ProxyProtocol;
64 string CmrUtils::ProxyHost;
65 string CmrUtils::ProxyUser;
66 string CmrUtils::ProxyPassword;
67 string CmrUtils::ProxyUserPW;
68 int CmrUtils::ProxyPort = 0;
69 int CmrUtils::ProxyAuthType = 0;
70 bool CmrUtils::useInternalCache = false;
71 
72 string CmrUtils::NoProxyRegex;
73 
74 #define prolog std::string("CmrUtils::").append(__func__).append("() - ")
75 
76 
77 // Initialization routine for the gateway module for certain parameters
78 // and keys, like the white list, the MimeTypes translation.
79 void CmrUtils::Initialize()
80 {
81  // MimeTypes - translate from a mime type to a module name
82  bool found = false;
83  std::string key = CMR_MIMELIST;
84  std::vector<string> vals;
85  TheBESKeys::TheKeys()->get_values(key, vals, found);
86  if (found && vals.size()) {
87  std::vector<string>::iterator i = vals.begin();
88  std::vector<string>::iterator e = vals.end();
89  for (; i != e; i++) {
90  size_t colon = (*i).find(":");
91  if (colon == string::npos) {
92  string err = (string) "Malformed " + CMR_MIMELIST + " " + (*i)
93  + " specified in the gateway configuration";
94  throw BESSyntaxUserError(err, __FILE__, __LINE__);
95  }
96  string mod = (*i).substr(0, colon);
97  string mime = (*i).substr(colon + 1);
98  MimeList[mod] = mime;
99  }
100  }
101 
102  found = false;
103  key = CMR_PROXYHOST;
104  TheBESKeys::TheKeys()->get_value(key, CmrUtils::ProxyHost, found);
105  if (found && !CmrUtils::ProxyHost.empty()) {
106  // if the proxy host is set, then check to see if the port is
107  // set. Does not need to be.
108  found = false;
109  key = CMR_PROXYPORT;
110  string port;
111  TheBESKeys::TheKeys()->get_value(key, port, found);
112  if (found && !port.empty()) {
113  CmrUtils::ProxyPort = atoi(port.c_str());
114  if (!CmrUtils::ProxyPort) {
115  string err = (string) "CMR proxy host is specified, but specified port is absent";
116  throw BESSyntaxUserError(err, __FILE__, __LINE__);
117  }
118  }
119 
120  // @TODO Either use this or remove it - right now this variable is never used downstream
121  // find the protocol to use for the proxy server. If none set,
122  // default to http
123  found = false;
124  key = CMR_PROXYPROTOCOL;
125  TheBESKeys::TheKeys()->get_value(key, CmrUtils::ProxyProtocol, found);
126  if (!found || CmrUtils::ProxyProtocol.empty()) {
127  CmrUtils::ProxyProtocol = "http";
128  }
129 
130  // find the user to use for authenticating with the proxy server. If none set,
131  // default to ""
132  found = false;
133  key = CMR_PROXYUSER;
134  TheBESKeys::TheKeys()->get_value(key, CmrUtils::ProxyUser, found);
135  if (!found) {
136  CmrUtils::ProxyUser = "";
137  }
138 
139  // find the password to use for authenticating with the proxy server. If none set,
140  // default to ""
141  found = false;
142  key = CMR_PROXYPASSWORD;
143  TheBESKeys::TheKeys()->get_value(key, CmrUtils::ProxyPassword, found);
144  if (!found) {
145  CmrUtils::ProxyPassword = "";
146  }
147 
148  // find the user:password string to use for authenticating with the proxy server. If none set,
149  // default to ""
150  found = false;
151  key = CMR_PROXYUSERPW;
152  TheBESKeys::TheKeys()->get_value(key, CmrUtils::ProxyUserPW, found);
153  if (!found) {
154  CmrUtils::ProxyUserPW = "";
155  }
156 
157  // find the authentication mechanism to use with the proxy server. If none set,
158  // default to BASIC authentication.
159  found = false;
160  key = CMR_PROXYAUTHTYPE;
161  string authType;
162  TheBESKeys::TheKeys()->get_value(key, authType, found);
163  if (found) {
164  authType = BESUtil::lowercase(authType);
165  if (authType == "basic") {
166  CmrUtils::ProxyAuthType = CURLAUTH_BASIC;
167  BESDEBUG(MODULE, prolog << "ProxyAuthType BASIC set." << endl);
168  }
169  else if (authType == "digest") {
170  CmrUtils::ProxyAuthType = CURLAUTH_DIGEST;
171  BESDEBUG(MODULE, prolog << "ProxyAuthType DIGEST set." << endl);
172  }
173 
174  else if (authType == "ntlm") {
175  CmrUtils::ProxyAuthType = CURLAUTH_NTLM;
176  BESDEBUG(MODULE, prolog << "ProxyAuthType NTLM set." << endl);
177  }
178  else {
179  CmrUtils::ProxyAuthType = CURLAUTH_BASIC;
180  BESDEBUG(MODULE,
181  prolog << "User supplied an invalid value '"<< authType << "' for Gateway.ProxyAuthType. Falling back to BASIC authentication scheme." << endl);
182  }
183  }
184  else {
185  CmrUtils::ProxyAuthType = CURLAUTH_BASIC;
186  }
187  }
188 
189  found = false;
190  key = CMR_USE_INTERNAL_CACHE;
191  string use_cache;
192  TheBESKeys::TheKeys()->get_value(key, use_cache, found);
193  if (found) {
194  if (use_cache == "true" || use_cache == "TRUE" || use_cache == "True" || use_cache == "yes"
195  || use_cache == "YES" || use_cache == "Yes")
196  CmrUtils::useInternalCache = true;
197  else
198  CmrUtils::useInternalCache = false;
199  }
200  else {
201  // If not set, default to false. Assume squid or ...
202  CmrUtils::useInternalCache = false;
203  }
204  // Grab the value for the NoProxy regex; empty if there is none.
205  found = false; // Not used
206  TheBESKeys::TheKeys()->get_value("Gateway.NoProxy", CmrUtils::NoProxyRegex, found);
207 }
208 
209 
210 
211 void CmrUtils::Get_type_from_disposition(const string &disp, string &type)
212 {
213  size_t fnpos = disp.find("filename");
214  if (fnpos != string::npos) {
215  // Got the filename attribute, now get the
216  // filename, which is after the pound sign (#)
217  size_t pos = disp.find("#", fnpos);
218  if (pos == string::npos) pos = disp.find("=", fnpos);
219  if (pos != string::npos) {
220  // Got the filename to the end of the
221  // string, now get it to either the end of
222  // the string or the start of the next
223  // attribute
224  string filename;
225  size_t sp = disp.find(" ", pos);
226  if (pos != string::npos) {
227  // space before the next attribute
228  filename = disp.substr(pos + 1, sp - pos - 1);
229  }
230  else {
231  // to the end of the string
232  filename = disp.substr(pos + 1);
233  }
234 
235  // now see if it's wrapped in quotes
236  if (filename[0] == '"') {
237  filename = filename.substr(1);
238  }
239  if (filename[filename.length() - 1] == '"') {
240  filename = filename.substr(0, filename.length() - 1);
241  }
242 
243  // we have the filename now, run it through
244  // the type match to get the file type.
245 
247  type = utils->get_handler_name(filename);
248 
249 #if 0
250  const BESCatalogUtils *utils = BESCatalogUtils::Utils("catalog");
251  BESCatalogUtils::match_citer i = utils->match_list_begin();
252  BESCatalogUtils::match_citer ie = utils->match_list_end();
253  bool done = false;
254  for (; i != ie && !done; i++) {
255  BESCatalogUtils::handler_regex match = (*i);
256  try {
257  BESDEBUG(MODULE,
258  prolog << "Comparing disp filename " << filename << " against expr " << match.regex << endl);
259  BESRegex reg_expr(match.regex.c_str());
260  if (reg_expr.match(filename.c_str(), filename.length()) == static_cast<int>(filename.length())) {
261  type = match.handler;
262  done = true;
263  }
264  }
265  catch (Error &e) {
266  string serr = (string) "Unable to match data type, " + "malformed Catalog TypeMatch parameter "
267  + "in bes configuration file around " + match.regex + ": " + e.get_error_message();
268  throw BESDapError(serr, false, e.get_error_code(), __FILE__, __LINE__);
269  }
270  }
271 #endif
272  }
273  }
274 }
275 
276 void CmrUtils::Get_type_from_content_type(const string &ctype, string &type)
277 {
278  BESDEBUG(MODULE, prolog << "BEGIN" << endl);
279  std::map<string, string>::iterator i = MimeList.begin();
280  std::map<string, string>::iterator e = MimeList.end();
281  bool done = false;
282  for (; i != e && !done; i++) {
283  BESDEBUG(MODULE,
284  prolog << "Comparing content type '" << ctype << "' against mime list element '" << (*i).second << "'"<< endl);
285  BESDEBUG(MODULE,
286  prolog << "first: " << (*i).first << " second: " << (*i).second << endl);
287 
288  if ((*i).second == ctype) {
289 
290  BESDEBUG(MODULE, prolog << "MATCH" << endl);
291 
292  type = (*i).first;
293  done = true;
294  }
295  }
296  BESDEBUG(MODULE, "GatewayUtils::Get_type_from_content_type() - END" << endl);
297 }
298 
299 void CmrUtils::Get_type_from_url(const string &url, string &type)
300 {
301  // Just run the url through the type match from the configuration
302 
303  const BESCatalogUtils *utils = BESCatalogList::TheCatalogList()->find_catalog(CMR_CATALOG_NAME)->get_catalog_utils();
304 
305  type = utils->get_handler_name(url);
306 
307 #if 0
308 
309  BESCatalogUtils::match_citer i = utils->match_list_begin();
310  BESCatalogUtils::match_citer ie = utils->match_list_end();
311  bool done = false;
312  for (; i != ie && !done; i++) {
313  BESCatalogUtils::handler_regex match = (*i);
314  try {
315  BESDEBUG(MODULE,
316  prolog << "Comparing url " << url << " against type match expr " << match.regex << endl);
317  BESRegex reg_expr(match.regex.c_str());
318  if (reg_expr.match(url.c_str(), url.length()) == static_cast<int>(url.length())) {
319  type = match.handler;
320  done = true;
321  BESDEBUG(MODULE, prolog << "MATCH type: " << type << endl);
322  }
323  }
324  catch (Error &e) {
325  string serr = (string) "Unable to match data type! Malformed Catalog TypeMatch parameter "
326  + "in bes configuration file around " + match.regex + ": " + e.get_error_message();
327  throw BESInternalError(serr, __FILE__, __LINE__);
328  }
329  }
330 #endif
331 
332 }
333 
334 #if 0
335 bool GatewayUtils::Is_Whitelisted(const std::string &url){
336  bool whitelisted = false;
337  std::vector<std::string>::const_iterator i = WhiteList.begin();
338  std::vector<std::string>::const_iterator e = WhiteList.end();
339  for (; i != e && !whitelisted; i++) {
340  if ((*i).length() <= url.length()) {
341  if (url.substr(0, (*i).length()) == (*i)) {
342  whitelisted = true;
343  }
344  }
345  }
346  return whitelisted;
347 }
348 
349 #endif
350 
351 Granule *
352 CmrUtils::getTemporalFacetGranule(const std::string granule_path)
353 {
354 
355  BESDEBUG(MODULE, prolog << "BEGIN (granule_path: '" << granule_path << ")" << endl);
356 
357  string collection;
358  string facet = "temporal";
359  string year = "-";
360  string month = "-";
361  string day = "-";
362  string granule_id = "-";
363 
364  string path = BESUtil::normalize_path(granule_path,false, false);
365  vector<string> path_elements = BESUtil::split(path);
366  BESDEBUG(MODULE, prolog << "path: '" << path << "' path_elements.size(): " << path_elements.size() << endl);
367 
368  switch(path_elements.size()){
369  case 6:
370  {
371  collection = path_elements[0];
372  BESDEBUG(MODULE, prolog << "collection: '" << collection << endl);
373  facet = path_elements[1];
374  BESDEBUG(MODULE, prolog << "facet: '" << facet << endl);
375  year = path_elements[2];
376  BESDEBUG(MODULE, prolog << "year: '" << year << endl);
377  month = path_elements[3];
378  BESDEBUG(MODULE, prolog << "month: '" << month << endl);
379  day = path_elements[4];
380  BESDEBUG(MODULE, prolog << "day: '" << day << endl);
381  granule_id = path_elements[5];
382  BESDEBUG(MODULE, prolog << "granule_id: '" << granule_id << endl);
383 }
384  break;
385  default:
386  {
387  throw BESNotFoundError("Can't find it man...",__FILE__,__LINE__);
388  }
389  break;
390  }
391  CmrApi cmrApi;
392 
393  return cmrApi.get_granule( collection, year, month, day, granule_id);
394 }
395 
396 
397 
BESNotFoundError
error thrown if the resource requested cannot be found
Definition: BESNotFoundError.h:40
BESCatalogUtils
Definition: BESCatalogUtils.h:61
BESUtil::normalize_path
static std::string normalize_path(const std::string &path, bool leading_separator, bool trailing_separator, const std::string separator="/")
Removes duplicate separators and provides leading and trailing separators as directed.
Definition: BESUtil.cc:1011
BESCatalogList::default_catalog
virtual BESCatalog * default_catalog() const
The the default catalog.
Definition: BESCatalogList.h:118
libdap
Definition: BESDapFunctionResponseCache.h:35
BESCatalogList::TheCatalogList
static BESCatalogList * TheCatalogList()
Get the singleton BESCatalogList instance.
Definition: BESCatalogList.cc:81
TheBESKeys::TheKeys
static TheBESKeys * TheKeys()
Definition: TheBESKeys.cc:62
BESCatalogUtils::get_handler_name
std::string get_handler_name(const std::string &item) const
Find the handler name that will process.
Definition: BESCatalogUtils.cc:422
BESSyntaxUserError
error thrown if there is a user syntax error in the request or any other user error
Definition: BESSyntaxUserError.h:41
cmr::Granule
Definition: Granule.h:43
cmr::CmrApi
Definition: CmrApi.h:45
BESInternalError
exception thrown if internal error encountered
Definition: BESInternalError.h:43
BESCatalog::get_catalog_utils
virtual BESCatalogUtils * get_catalog_utils() const
Get a pointer to the utilities, customized for this catalog.
Definition: BESCatalog.h:113
TheBESKeys::get_value
void get_value(const std::string &s, std::string &val, bool &found)
Retrieve the value of a given key, if set.
Definition: TheBESKeys.cc:272
TheBESKeys::get_values
void get_values(const std::string &s, std::vector< std::string > &vals, bool &found)
Retrieve the values of a given key, if set.
Definition: TheBESKeys.cc:303
Error
BESUtil::split
static std::vector< std::string > split(const std::string &s, char delim='/', bool skip_empty=true)
Splits the string s into the return vector of tokens using the delimiter delim and skipping empty val...
Definition: BESUtil.cc:1125
BESUtil::lowercase
static std::string lowercase(const std::string &s)
Definition: BESUtil.cc:200
BESRegex
Definition: BESRegex.h:41
BESDapError
error object created from libdap error objects and can handle those errors
Definition: BESDapError.h:59